diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..505e1c096540fcd7390f61b999009967089ba760 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,55 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/tokenizer.json filter=lfs diff=lfs merge=lfs -text +systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1206/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1608/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2010/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2412/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2814/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3216/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3618/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-402/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-4020/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-804/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1206/tokenizer.json filter=lfs diff=lfs merge=lfs -text +substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1608/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3d96176467b90f22510bca77d69b05a7e15b7128 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1155/trainer_state.json @@ -0,0 +1,297 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1155, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + }, + { + "entropy": 0.5479793678875544, + "epoch": 1.0389863547758285, + "grad_norm": 5.7315897941589355, + "learning_rate": 7.154541090666746e-05, + "loss": 2.099454345703125, + "mean_token_accuracy": 0.8584242837512912, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5112442304193974, + "epoch": 1.1689408706952567, + "grad_norm": 3.4090652465820312, + "learning_rate": 7.148808255369052e-05, + "loss": 1.984275360107422, + "mean_token_accuracy": 0.8656484684348107, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5141718898713589, + "epoch": 1.2988953866146848, + "grad_norm": 2.981410503387451, + "learning_rate": 7.135737100578312e-05, + "loss": 1.9902142333984374, + "mean_token_accuracy": 0.8631115168333053, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.51056716889143, + "epoch": 1.428849902534113, + "grad_norm": 2.724004030227661, + "learning_rate": 7.115354484244213e-05, + "loss": 1.9883427429199219, + "mean_token_accuracy": 0.8649928227066994, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.49679399371147154, + "epoch": 1.5588044184535412, + "grad_norm": 4.024278163909912, + "learning_rate": 7.08770228753891e-05, + "loss": 1.9636073303222656, + "mean_token_accuracy": 0.8658123564720154, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.48449113249778747, + "epoch": 1.6887589343729694, + "grad_norm": 6.3375043869018555, + "learning_rate": 7.052837328801699e-05, + "loss": 1.9092594909667968, + "mean_token_accuracy": 0.8693041172623635, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.49986490070819856, + "epoch": 1.8187134502923976, + "grad_norm": 2.8352301120758057, + "learning_rate": 7.010831246791564e-05, + "loss": 1.976134033203125, + "mean_token_accuracy": 0.865630615055561, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.4923377679288387, + "epoch": 1.9486679662118258, + "grad_norm": 3.2214341163635254, + "learning_rate": 6.96177035348746e-05, + "loss": 1.9487001037597655, + "mean_token_accuracy": 0.8670864734053612, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.494693323969841, + "eval_loss": 0.5423477292060852, + "eval_mean_token_accuracy": 0.8519259931949469, + "eval_num_tokens": 1889564.0, + "eval_runtime": 93.577, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 770 + }, + { + "entropy": 0.4363510547271326, + "epoch": 2.077972709551657, + "grad_norm": 3.9445464611053467, + "learning_rate": 6.905755456738823e-05, + "loss": 1.701644744873047, + "mean_token_accuracy": 0.8784636774254804, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.3984112246334553, + "epoch": 2.207927225471085, + "grad_norm": 2.7381982803344727, + "learning_rate": 6.842901653130672e-05, + "loss": 1.5607098388671874, + "mean_token_accuracy": 0.8866209423542023, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.40140418589115145, + "epoch": 2.3378817413905133, + "grad_norm": 3.321972131729126, + "learning_rate": 6.773338091488962e-05, + "loss": 1.5787547302246094, + "mean_token_accuracy": 0.8854541593790054, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.3955576819181442, + "epoch": 2.4678362573099415, + "grad_norm": 2.413280725479126, + "learning_rate": 6.697207707512082e-05, + "loss": 1.5567779541015625, + "mean_token_accuracy": 0.886657263636589, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.3982538402080536, + "epoch": 2.5977907732293697, + "grad_norm": 2.769259452819824, + "learning_rate": 6.614666930073788e-05, + "loss": 1.5671014404296875, + "mean_token_accuracy": 0.8863470497727394, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4086438773572445, + "epoch": 2.727745289148798, + "grad_norm": 2.636345624923706, + "learning_rate": 6.525885359801053e-05, + "loss": 1.5918766784667968, + "mean_token_accuracy": 0.8857174924015999, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.39681060910224913, + "epoch": 2.857699805068226, + "grad_norm": 2.708190679550171, + "learning_rate": 6.431045420587239e-05, + "loss": 1.55162841796875, + "mean_token_accuracy": 0.8879095411300659, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.41047369830310343, + "epoch": 2.9876543209876543, + "grad_norm": 2.741119623184204, + "learning_rate": 6.330341984756691e-05, + "loss": 1.607012939453125, + "mean_token_accuracy": 0.886100817322731, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.43975070491433144, + "eval_loss": 0.5563390254974365, + "eval_mean_token_accuracy": 0.8540356571857746, + "eval_num_tokens": 2834346.0, + "eval_runtime": 93.6235, + "eval_samples_per_second": 17.699, + "eval_steps_per_second": 2.222, + "step": 1155 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0079918928159476e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5a1e1ac451dcfbf57c9a5bad7455584a42bdae50 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1540/trainer_state.json @@ -0,0 +1,378 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 1540, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + }, + { + "entropy": 0.5479793678875544, + "epoch": 1.0389863547758285, + "grad_norm": 5.7315897941589355, + "learning_rate": 7.154541090666746e-05, + "loss": 2.099454345703125, + "mean_token_accuracy": 0.8584242837512912, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5112442304193974, + "epoch": 1.1689408706952567, + "grad_norm": 3.4090652465820312, + "learning_rate": 7.148808255369052e-05, + "loss": 1.984275360107422, + "mean_token_accuracy": 0.8656484684348107, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5141718898713589, + "epoch": 1.2988953866146848, + "grad_norm": 2.981410503387451, + "learning_rate": 7.135737100578312e-05, + "loss": 1.9902142333984374, + "mean_token_accuracy": 0.8631115168333053, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.51056716889143, + "epoch": 1.428849902534113, + "grad_norm": 2.724004030227661, + "learning_rate": 7.115354484244213e-05, + "loss": 1.9883427429199219, + "mean_token_accuracy": 0.8649928227066994, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.49679399371147154, + "epoch": 1.5588044184535412, + "grad_norm": 4.024278163909912, + "learning_rate": 7.08770228753891e-05, + "loss": 1.9636073303222656, + "mean_token_accuracy": 0.8658123564720154, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.48449113249778747, + "epoch": 1.6887589343729694, + "grad_norm": 6.3375043869018555, + "learning_rate": 7.052837328801699e-05, + "loss": 1.9092594909667968, + "mean_token_accuracy": 0.8693041172623635, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.49986490070819856, + "epoch": 1.8187134502923976, + "grad_norm": 2.8352301120758057, + "learning_rate": 7.010831246791564e-05, + "loss": 1.976134033203125, + "mean_token_accuracy": 0.865630615055561, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.4923377679288387, + "epoch": 1.9486679662118258, + "grad_norm": 3.2214341163635254, + "learning_rate": 6.96177035348746e-05, + "loss": 1.9487001037597655, + "mean_token_accuracy": 0.8670864734053612, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.494693323969841, + "eval_loss": 0.5423477292060852, + "eval_mean_token_accuracy": 0.8519259931949469, + "eval_num_tokens": 1889564.0, + "eval_runtime": 93.577, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 770 + }, + { + "entropy": 0.4363510547271326, + "epoch": 2.077972709551657, + "grad_norm": 3.9445464611053467, + "learning_rate": 6.905755456738823e-05, + "loss": 1.701644744873047, + "mean_token_accuracy": 0.8784636774254804, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.3984112246334553, + "epoch": 2.207927225471085, + "grad_norm": 2.7381982803344727, + "learning_rate": 6.842901653130672e-05, + "loss": 1.5607098388671874, + "mean_token_accuracy": 0.8866209423542023, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.40140418589115145, + "epoch": 2.3378817413905133, + "grad_norm": 3.321972131729126, + "learning_rate": 6.773338091488962e-05, + "loss": 1.5787547302246094, + "mean_token_accuracy": 0.8854541593790054, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.3955576819181442, + "epoch": 2.4678362573099415, + "grad_norm": 2.413280725479126, + "learning_rate": 6.697207707512082e-05, + "loss": 1.5567779541015625, + "mean_token_accuracy": 0.886657263636589, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.3982538402080536, + "epoch": 2.5977907732293697, + "grad_norm": 2.769259452819824, + "learning_rate": 6.614666930073788e-05, + "loss": 1.5671014404296875, + "mean_token_accuracy": 0.8863470497727394, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4086438773572445, + "epoch": 2.727745289148798, + "grad_norm": 2.636345624923706, + "learning_rate": 6.525885359801053e-05, + "loss": 1.5918766784667968, + "mean_token_accuracy": 0.8857174924015999, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.39681060910224913, + "epoch": 2.857699805068226, + "grad_norm": 2.708190679550171, + "learning_rate": 6.431045420587239e-05, + "loss": 1.55162841796875, + "mean_token_accuracy": 0.8879095411300659, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.41047369830310343, + "epoch": 2.9876543209876543, + "grad_norm": 2.741119623184204, + "learning_rate": 6.330341984756691e-05, + "loss": 1.607012939453125, + "mean_token_accuracy": 0.886100817322731, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.43975070491433144, + "eval_loss": 0.5563390254974365, + "eval_mean_token_accuracy": 0.8540356571857746, + "eval_num_tokens": 2834346.0, + "eval_runtime": 93.6235, + "eval_samples_per_second": 17.699, + "eval_steps_per_second": 2.222, + "step": 1155 + }, + { + "entropy": 0.3157867716634693, + "epoch": 3.116959064327485, + "grad_norm": 2.720665693283081, + "learning_rate": 6.223981972650901e-05, + "loss": 1.2099590301513672, + "mean_token_accuracy": 0.90753990621423, + "num_tokens": 2939773.0, + "step": 1200 + }, + { + "entropy": 0.30736712127923965, + "epoch": 3.246913580246914, + "grad_norm": 2.7423577308654785, + "learning_rate": 6.112183927459039e-05, + "loss": 1.1804743194580078, + "mean_token_accuracy": 0.9091711294651031, + "num_tokens": 3063617.0, + "step": 1250 + }, + { + "entropy": 0.31040274240076543, + "epoch": 3.3768680961663415, + "grad_norm": 2.4974067211151123, + "learning_rate": 5.995177566166415e-05, + "loss": 1.205712127685547, + "mean_token_accuracy": 0.9075283539295197, + "num_tokens": 3189800.0, + "step": 1300 + }, + { + "entropy": 0.3108955673873425, + "epoch": 3.50682261208577, + "grad_norm": 5.503927707672119, + "learning_rate": 5.873203307543615e-05, + "loss": 1.2054142761230469, + "mean_token_accuracy": 0.9072180911898613, + "num_tokens": 3311908.0, + "step": 1350 + }, + { + "entropy": 0.3123178996890783, + "epoch": 3.636777128005198, + "grad_norm": 3.6903016567230225, + "learning_rate": 5.746511778146122e-05, + "loss": 1.2007347869873046, + "mean_token_accuracy": 0.9080208915472031, + "num_tokens": 3432934.0, + "step": 1400 + }, + { + "entropy": 0.30725445054471495, + "epoch": 3.7667316439246266, + "grad_norm": 3.651270627975464, + "learning_rate": 5.615363297339524e-05, + "loss": 1.2028478240966798, + "mean_token_accuracy": 0.9079937645792961, + "num_tokens": 3561408.0, + "step": 1450 + }, + { + "entropy": 0.3093853303790092, + "epoch": 3.8966861598440543, + "grad_norm": 3.8038971424102783, + "learning_rate": 5.480027342408394e-05, + "loss": 1.2103264617919922, + "mean_token_accuracy": 0.9064547646045685, + "num_tokens": 3684624.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.3824075059248851, + "eval_loss": 0.596433162689209, + "eval_mean_token_accuracy": 0.8518055370793893, + "eval_num_tokens": 3779128.0, + "eval_runtime": 93.8991, + "eval_samples_per_second": 17.647, + "eval_steps_per_second": 2.215, + "step": 1540 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3422010646683433e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..49ecf8d0e41abc378d8a6186b566ef2c79cfe10b --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-1925/trainer_state.json @@ -0,0 +1,469 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1925, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + }, + { + "entropy": 0.5479793678875544, + "epoch": 1.0389863547758285, + "grad_norm": 5.7315897941589355, + "learning_rate": 7.154541090666746e-05, + "loss": 2.099454345703125, + "mean_token_accuracy": 0.8584242837512912, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5112442304193974, + "epoch": 1.1689408706952567, + "grad_norm": 3.4090652465820312, + "learning_rate": 7.148808255369052e-05, + "loss": 1.984275360107422, + "mean_token_accuracy": 0.8656484684348107, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5141718898713589, + "epoch": 1.2988953866146848, + "grad_norm": 2.981410503387451, + "learning_rate": 7.135737100578312e-05, + "loss": 1.9902142333984374, + "mean_token_accuracy": 0.8631115168333053, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.51056716889143, + "epoch": 1.428849902534113, + "grad_norm": 2.724004030227661, + "learning_rate": 7.115354484244213e-05, + "loss": 1.9883427429199219, + "mean_token_accuracy": 0.8649928227066994, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.49679399371147154, + "epoch": 1.5588044184535412, + "grad_norm": 4.024278163909912, + "learning_rate": 7.08770228753891e-05, + "loss": 1.9636073303222656, + "mean_token_accuracy": 0.8658123564720154, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.48449113249778747, + "epoch": 1.6887589343729694, + "grad_norm": 6.3375043869018555, + "learning_rate": 7.052837328801699e-05, + "loss": 1.9092594909667968, + "mean_token_accuracy": 0.8693041172623635, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.49986490070819856, + "epoch": 1.8187134502923976, + "grad_norm": 2.8352301120758057, + "learning_rate": 7.010831246791564e-05, + "loss": 1.976134033203125, + "mean_token_accuracy": 0.865630615055561, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.4923377679288387, + "epoch": 1.9486679662118258, + "grad_norm": 3.2214341163635254, + "learning_rate": 6.96177035348746e-05, + "loss": 1.9487001037597655, + "mean_token_accuracy": 0.8670864734053612, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.494693323969841, + "eval_loss": 0.5423477292060852, + "eval_mean_token_accuracy": 0.8519259931949469, + "eval_num_tokens": 1889564.0, + "eval_runtime": 93.577, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 770 + }, + { + "entropy": 0.4363510547271326, + "epoch": 2.077972709551657, + "grad_norm": 3.9445464611053467, + "learning_rate": 6.905755456738823e-05, + "loss": 1.701644744873047, + "mean_token_accuracy": 0.8784636774254804, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.3984112246334553, + "epoch": 2.207927225471085, + "grad_norm": 2.7381982803344727, + "learning_rate": 6.842901653130672e-05, + "loss": 1.5607098388671874, + "mean_token_accuracy": 0.8866209423542023, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.40140418589115145, + "epoch": 2.3378817413905133, + "grad_norm": 3.321972131729126, + "learning_rate": 6.773338091488962e-05, + "loss": 1.5787547302246094, + "mean_token_accuracy": 0.8854541593790054, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.3955576819181442, + "epoch": 2.4678362573099415, + "grad_norm": 2.413280725479126, + "learning_rate": 6.697207707512082e-05, + "loss": 1.5567779541015625, + "mean_token_accuracy": 0.886657263636589, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.3982538402080536, + "epoch": 2.5977907732293697, + "grad_norm": 2.769259452819824, + "learning_rate": 6.614666930073788e-05, + "loss": 1.5671014404296875, + "mean_token_accuracy": 0.8863470497727394, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4086438773572445, + "epoch": 2.727745289148798, + "grad_norm": 2.636345624923706, + "learning_rate": 6.525885359801053e-05, + "loss": 1.5918766784667968, + "mean_token_accuracy": 0.8857174924015999, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.39681060910224913, + "epoch": 2.857699805068226, + "grad_norm": 2.708190679550171, + "learning_rate": 6.431045420587239e-05, + "loss": 1.55162841796875, + "mean_token_accuracy": 0.8879095411300659, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.41047369830310343, + "epoch": 2.9876543209876543, + "grad_norm": 2.741119623184204, + "learning_rate": 6.330341984756691e-05, + "loss": 1.607012939453125, + "mean_token_accuracy": 0.886100817322731, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.43975070491433144, + "eval_loss": 0.5563390254974365, + "eval_mean_token_accuracy": 0.8540356571857746, + "eval_num_tokens": 2834346.0, + "eval_runtime": 93.6235, + "eval_samples_per_second": 17.699, + "eval_steps_per_second": 2.222, + "step": 1155 + }, + { + "entropy": 0.3157867716634693, + "epoch": 3.116959064327485, + "grad_norm": 2.720665693283081, + "learning_rate": 6.223981972650901e-05, + "loss": 1.2099590301513672, + "mean_token_accuracy": 0.90753990621423, + "num_tokens": 2939773.0, + "step": 1200 + }, + { + "entropy": 0.30736712127923965, + "epoch": 3.246913580246914, + "grad_norm": 2.7423577308654785, + "learning_rate": 6.112183927459039e-05, + "loss": 1.1804743194580078, + "mean_token_accuracy": 0.9091711294651031, + "num_tokens": 3063617.0, + "step": 1250 + }, + { + "entropy": 0.31040274240076543, + "epoch": 3.3768680961663415, + "grad_norm": 2.4974067211151123, + "learning_rate": 5.995177566166415e-05, + "loss": 1.205712127685547, + "mean_token_accuracy": 0.9075283539295197, + "num_tokens": 3189800.0, + "step": 1300 + }, + { + "entropy": 0.3108955673873425, + "epoch": 3.50682261208577, + "grad_norm": 5.503927707672119, + "learning_rate": 5.873203307543615e-05, + "loss": 1.2054142761230469, + "mean_token_accuracy": 0.9072180911898613, + "num_tokens": 3311908.0, + "step": 1350 + }, + { + "entropy": 0.3123178996890783, + "epoch": 3.636777128005198, + "grad_norm": 3.6903016567230225, + "learning_rate": 5.746511778146122e-05, + "loss": 1.2007347869873046, + "mean_token_accuracy": 0.9080208915472031, + "num_tokens": 3432934.0, + "step": 1400 + }, + { + "entropy": 0.30725445054471495, + "epoch": 3.7667316439246266, + "grad_norm": 3.651270627975464, + "learning_rate": 5.615363297339524e-05, + "loss": 1.2028478240966798, + "mean_token_accuracy": 0.9079937645792961, + "num_tokens": 3561408.0, + "step": 1450 + }, + { + "entropy": 0.3093853303790092, + "epoch": 3.8966861598440543, + "grad_norm": 3.8038971424102783, + "learning_rate": 5.480027342408394e-05, + "loss": 1.2103264617919922, + "mean_token_accuracy": 0.9064547646045685, + "num_tokens": 3684624.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.3824075059248851, + "eval_loss": 0.596433162689209, + "eval_mean_token_accuracy": 0.8518055370793893, + "eval_num_tokens": 3779128.0, + "eval_runtime": 93.8991, + "eval_samples_per_second": 17.647, + "eval_steps_per_second": 2.215, + "step": 1540 + }, + { + "entropy": 0.29751914612312413, + "epoch": 4.025990903183885, + "grad_norm": 3.8319175243377686, + "learning_rate": 5.340781994847969e-05, + "loss": 1.1474579620361327, + "mean_token_accuracy": 0.9113474434344613, + "num_tokens": 3804993.0, + "step": 1550 + }, + { + "entropy": 0.22374473124742508, + "epoch": 4.155945419103314, + "grad_norm": 4.0295023918151855, + "learning_rate": 5.197913368976308e-05, + "loss": 0.8647676849365235, + "mean_token_accuracy": 0.9303225663304329, + "num_tokens": 3928162.0, + "step": 1600 + }, + { + "entropy": 0.2204760881140828, + "epoch": 4.2858999350227425, + "grad_norm": 2.7091262340545654, + "learning_rate": 5.0517150240410225e-05, + "loss": 0.8546311950683594, + "mean_token_accuracy": 0.9325515595078469, + "num_tokens": 4049012.0, + "step": 1650 + }, + { + "entropy": 0.22379870273172855, + "epoch": 4.41585445094217, + "grad_norm": 3.676753044128418, + "learning_rate": 4.90248736102854e-05, + "loss": 0.8702528381347656, + "mean_token_accuracy": 0.9302561604976654, + "num_tokens": 4167845.0, + "step": 1700 + }, + { + "entropy": 0.22574612841010094, + "epoch": 4.545808966861598, + "grad_norm": 3.4745290279388428, + "learning_rate": 4.750537005415305e-05, + "loss": 0.8742953491210937, + "mean_token_accuracy": 0.9304392230510712, + "num_tokens": 4287505.0, + "step": 1750 + }, + { + "entropy": 0.2232594309747219, + "epoch": 4.675763482781027, + "grad_norm": 2.898700475692749, + "learning_rate": 4.596176177129214e-05, + "loss": 0.8619278717041016, + "mean_token_accuracy": 0.930913093984127, + "num_tokens": 4412221.0, + "step": 1800 + }, + { + "entropy": 0.22699439719319345, + "epoch": 4.805717998700455, + "grad_norm": 4.208059310913086, + "learning_rate": 4.4397220490158516e-05, + "loss": 0.8736541748046875, + "mean_token_accuracy": 0.9296354326605797, + "num_tokens": 4537178.0, + "step": 1850 + }, + { + "entropy": 0.22327437780797482, + "epoch": 4.935672514619883, + "grad_norm": 4.770328998565674, + "learning_rate": 4.281496095127722e-05, + "loss": 0.8705735778808594, + "mean_token_accuracy": 0.9296216520667077, + "num_tokens": 4664196.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.30851531530228943, + "eval_loss": 0.7023613452911377, + "eval_mean_token_accuracy": 0.8488265181390139, + "eval_num_tokens": 4723910.0, + "eval_runtime": 93.5227, + "eval_samples_per_second": 17.718, + "eval_steps_per_second": 2.224, + "step": 1925 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6766967130248312e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..952425ae653d7c11b1cc044d31d6989ebdca0d5a --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2310/trainer_state.json @@ -0,0 +1,560 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 2310, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + }, + { + "entropy": 0.5479793678875544, + "epoch": 1.0389863547758285, + "grad_norm": 5.7315897941589355, + "learning_rate": 7.154541090666746e-05, + "loss": 2.099454345703125, + "mean_token_accuracy": 0.8584242837512912, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5112442304193974, + "epoch": 1.1689408706952567, + "grad_norm": 3.4090652465820312, + "learning_rate": 7.148808255369052e-05, + "loss": 1.984275360107422, + "mean_token_accuracy": 0.8656484684348107, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5141718898713589, + "epoch": 1.2988953866146848, + "grad_norm": 2.981410503387451, + "learning_rate": 7.135737100578312e-05, + "loss": 1.9902142333984374, + "mean_token_accuracy": 0.8631115168333053, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.51056716889143, + "epoch": 1.428849902534113, + "grad_norm": 2.724004030227661, + "learning_rate": 7.115354484244213e-05, + "loss": 1.9883427429199219, + "mean_token_accuracy": 0.8649928227066994, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.49679399371147154, + "epoch": 1.5588044184535412, + "grad_norm": 4.024278163909912, + "learning_rate": 7.08770228753891e-05, + "loss": 1.9636073303222656, + "mean_token_accuracy": 0.8658123564720154, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.48449113249778747, + "epoch": 1.6887589343729694, + "grad_norm": 6.3375043869018555, + "learning_rate": 7.052837328801699e-05, + "loss": 1.9092594909667968, + "mean_token_accuracy": 0.8693041172623635, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.49986490070819856, + "epoch": 1.8187134502923976, + "grad_norm": 2.8352301120758057, + "learning_rate": 7.010831246791564e-05, + "loss": 1.976134033203125, + "mean_token_accuracy": 0.865630615055561, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.4923377679288387, + "epoch": 1.9486679662118258, + "grad_norm": 3.2214341163635254, + "learning_rate": 6.96177035348746e-05, + "loss": 1.9487001037597655, + "mean_token_accuracy": 0.8670864734053612, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.494693323969841, + "eval_loss": 0.5423477292060852, + "eval_mean_token_accuracy": 0.8519259931949469, + "eval_num_tokens": 1889564.0, + "eval_runtime": 93.577, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 770 + }, + { + "entropy": 0.4363510547271326, + "epoch": 2.077972709551657, + "grad_norm": 3.9445464611053467, + "learning_rate": 6.905755456738823e-05, + "loss": 1.701644744873047, + "mean_token_accuracy": 0.8784636774254804, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.3984112246334553, + "epoch": 2.207927225471085, + "grad_norm": 2.7381982803344727, + "learning_rate": 6.842901653130672e-05, + "loss": 1.5607098388671874, + "mean_token_accuracy": 0.8866209423542023, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.40140418589115145, + "epoch": 2.3378817413905133, + "grad_norm": 3.321972131729126, + "learning_rate": 6.773338091488962e-05, + "loss": 1.5787547302246094, + "mean_token_accuracy": 0.8854541593790054, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.3955576819181442, + "epoch": 2.4678362573099415, + "grad_norm": 2.413280725479126, + "learning_rate": 6.697207707512082e-05, + "loss": 1.5567779541015625, + "mean_token_accuracy": 0.886657263636589, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.3982538402080536, + "epoch": 2.5977907732293697, + "grad_norm": 2.769259452819824, + "learning_rate": 6.614666930073788e-05, + "loss": 1.5671014404296875, + "mean_token_accuracy": 0.8863470497727394, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4086438773572445, + "epoch": 2.727745289148798, + "grad_norm": 2.636345624923706, + "learning_rate": 6.525885359801053e-05, + "loss": 1.5918766784667968, + "mean_token_accuracy": 0.8857174924015999, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.39681060910224913, + "epoch": 2.857699805068226, + "grad_norm": 2.708190679550171, + "learning_rate": 6.431045420587239e-05, + "loss": 1.55162841796875, + "mean_token_accuracy": 0.8879095411300659, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.41047369830310343, + "epoch": 2.9876543209876543, + "grad_norm": 2.741119623184204, + "learning_rate": 6.330341984756691e-05, + "loss": 1.607012939453125, + "mean_token_accuracy": 0.886100817322731, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.43975070491433144, + "eval_loss": 0.5563390254974365, + "eval_mean_token_accuracy": 0.8540356571857746, + "eval_num_tokens": 2834346.0, + "eval_runtime": 93.6235, + "eval_samples_per_second": 17.699, + "eval_steps_per_second": 2.222, + "step": 1155 + }, + { + "entropy": 0.3157867716634693, + "epoch": 3.116959064327485, + "grad_norm": 2.720665693283081, + "learning_rate": 6.223981972650901e-05, + "loss": 1.2099590301513672, + "mean_token_accuracy": 0.90753990621423, + "num_tokens": 2939773.0, + "step": 1200 + }, + { + "entropy": 0.30736712127923965, + "epoch": 3.246913580246914, + "grad_norm": 2.7423577308654785, + "learning_rate": 6.112183927459039e-05, + "loss": 1.1804743194580078, + "mean_token_accuracy": 0.9091711294651031, + "num_tokens": 3063617.0, + "step": 1250 + }, + { + "entropy": 0.31040274240076543, + "epoch": 3.3768680961663415, + "grad_norm": 2.4974067211151123, + "learning_rate": 5.995177566166415e-05, + "loss": 1.205712127685547, + "mean_token_accuracy": 0.9075283539295197, + "num_tokens": 3189800.0, + "step": 1300 + }, + { + "entropy": 0.3108955673873425, + "epoch": 3.50682261208577, + "grad_norm": 5.503927707672119, + "learning_rate": 5.873203307543615e-05, + "loss": 1.2054142761230469, + "mean_token_accuracy": 0.9072180911898613, + "num_tokens": 3311908.0, + "step": 1350 + }, + { + "entropy": 0.3123178996890783, + "epoch": 3.636777128005198, + "grad_norm": 3.6903016567230225, + "learning_rate": 5.746511778146122e-05, + "loss": 1.2007347869873046, + "mean_token_accuracy": 0.9080208915472031, + "num_tokens": 3432934.0, + "step": 1400 + }, + { + "entropy": 0.30725445054471495, + "epoch": 3.7667316439246266, + "grad_norm": 3.651270627975464, + "learning_rate": 5.615363297339524e-05, + "loss": 1.2028478240966798, + "mean_token_accuracy": 0.9079937645792961, + "num_tokens": 3561408.0, + "step": 1450 + }, + { + "entropy": 0.3093853303790092, + "epoch": 3.8966861598440543, + "grad_norm": 3.8038971424102783, + "learning_rate": 5.480027342408394e-05, + "loss": 1.2103264617919922, + "mean_token_accuracy": 0.9064547646045685, + "num_tokens": 3684624.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.3824075059248851, + "eval_loss": 0.596433162689209, + "eval_mean_token_accuracy": 0.8518055370793893, + "eval_num_tokens": 3779128.0, + "eval_runtime": 93.8991, + "eval_samples_per_second": 17.647, + "eval_steps_per_second": 2.215, + "step": 1540 + }, + { + "entropy": 0.29751914612312413, + "epoch": 4.025990903183885, + "grad_norm": 3.8319175243377686, + "learning_rate": 5.340781994847969e-05, + "loss": 1.1474579620361327, + "mean_token_accuracy": 0.9113474434344613, + "num_tokens": 3804993.0, + "step": 1550 + }, + { + "entropy": 0.22374473124742508, + "epoch": 4.155945419103314, + "grad_norm": 4.0295023918151855, + "learning_rate": 5.197913368976308e-05, + "loss": 0.8647676849365235, + "mean_token_accuracy": 0.9303225663304329, + "num_tokens": 3928162.0, + "step": 1600 + }, + { + "entropy": 0.2204760881140828, + "epoch": 4.2858999350227425, + "grad_norm": 2.7091262340545654, + "learning_rate": 5.0517150240410225e-05, + "loss": 0.8546311950683594, + "mean_token_accuracy": 0.9325515595078469, + "num_tokens": 4049012.0, + "step": 1650 + }, + { + "entropy": 0.22379870273172855, + "epoch": 4.41585445094217, + "grad_norm": 3.676753044128418, + "learning_rate": 4.90248736102854e-05, + "loss": 0.8702528381347656, + "mean_token_accuracy": 0.9302561604976654, + "num_tokens": 4167845.0, + "step": 1700 + }, + { + "entropy": 0.22574612841010094, + "epoch": 4.545808966861598, + "grad_norm": 3.4745290279388428, + "learning_rate": 4.750537005415305e-05, + "loss": 0.8742953491210937, + "mean_token_accuracy": 0.9304392230510712, + "num_tokens": 4287505.0, + "step": 1750 + }, + { + "entropy": 0.2232594309747219, + "epoch": 4.675763482781027, + "grad_norm": 2.898700475692749, + "learning_rate": 4.596176177129214e-05, + "loss": 0.8619278717041016, + "mean_token_accuracy": 0.930913093984127, + "num_tokens": 4412221.0, + "step": 1800 + }, + { + "entropy": 0.22699439719319345, + "epoch": 4.805717998700455, + "grad_norm": 4.208059310913086, + "learning_rate": 4.4397220490158516e-05, + "loss": 0.8736541748046875, + "mean_token_accuracy": 0.9296354326605797, + "num_tokens": 4537178.0, + "step": 1850 + }, + { + "entropy": 0.22327437780797482, + "epoch": 4.935672514619883, + "grad_norm": 4.770328998565674, + "learning_rate": 4.281496095127722e-05, + "loss": 0.8705735778808594, + "mean_token_accuracy": 0.9296216520667077, + "num_tokens": 4664196.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.30851531530228943, + "eval_loss": 0.7023613452911377, + "eval_mean_token_accuracy": 0.8488265181390139, + "eval_num_tokens": 4723910.0, + "eval_runtime": 93.5227, + "eval_samples_per_second": 17.718, + "eval_steps_per_second": 2.224, + "step": 1925 + }, + { + "entropy": 0.1837335507848754, + "epoch": 5.064977257959714, + "grad_norm": 3.1876420974731445, + "learning_rate": 4.1218234301755803e-05, + "loss": 0.7019867706298828, + "mean_token_accuracy": 0.943129480484143, + "num_tokens": 4789568.0, + "step": 1950 + }, + { + "entropy": 0.1513645588979125, + "epoch": 5.1949317738791425, + "grad_norm": 5.2119059562683105, + "learning_rate": 3.961032141499117e-05, + "loss": 0.5882163619995118, + "mean_token_accuracy": 0.9523130711913109, + "num_tokens": 4913407.0, + "step": 2000 + }, + { + "entropy": 0.1646271352469921, + "epoch": 5.32488628979857, + "grad_norm": 3.4650797843933105, + "learning_rate": 3.799452614929641e-05, + "loss": 0.6260359191894531, + "mean_token_accuracy": 0.9499588277935982, + "num_tokens": 5032951.0, + "step": 2050 + }, + { + "entropy": 0.1620502556487918, + "epoch": 5.454840805717999, + "grad_norm": 4.383810997009277, + "learning_rate": 3.637416855929934e-05, + "loss": 0.619534568786621, + "mean_token_accuracy": 0.9506754752993584, + "num_tokens": 5156908.0, + "step": 2100 + }, + { + "entropy": 0.16391102869063615, + "epoch": 5.584795321637427, + "grad_norm": 3.5354411602020264, + "learning_rate": 3.475257807406162e-05, + "loss": 0.631606559753418, + "mean_token_accuracy": 0.9495515289902687, + "num_tokens": 5276087.0, + "step": 2150 + }, + { + "entropy": 0.15723124787211418, + "epoch": 5.714749837556855, + "grad_norm": 2.841092824935913, + "learning_rate": 3.313308665593597e-05, + "loss": 0.605863380432129, + "mean_token_accuracy": 0.9508947885036468, + "num_tokens": 5398787.0, + "step": 2200 + }, + { + "entropy": 0.15846401400864124, + "epoch": 5.844704353476283, + "grad_norm": 3.759674549102783, + "learning_rate": 3.151902195421776e-05, + "loss": 0.6139367294311523, + "mean_token_accuracy": 0.9504388865828514, + "num_tokens": 5522332.0, + "step": 2250 + }, + { + "entropy": 0.16854628551751374, + "epoch": 5.974658869395712, + "grad_norm": 3.469398260116577, + "learning_rate": 2.991370046765923e-05, + "loss": 0.6406163787841797, + "mean_token_accuracy": 0.9481290274858475, + "num_tokens": 5642361.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.2671854439406441, + "eval_loss": 0.8231481909751892, + "eval_mean_token_accuracy": 0.8452930894608681, + "eval_num_tokens": 5668692.0, + "eval_runtime": 93.4211, + "eval_samples_per_second": 17.737, + "eval_steps_per_second": 2.226, + "step": 2310 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.011472645767713e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..01c92d5ca6c961a0aacb0dc9965f3c17788a91de --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-2695/trainer_state.json @@ -0,0 +1,641 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 2695, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + }, + { + "entropy": 0.5479793678875544, + "epoch": 1.0389863547758285, + "grad_norm": 5.7315897941589355, + "learning_rate": 7.154541090666746e-05, + "loss": 2.099454345703125, + "mean_token_accuracy": 0.8584242837512912, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5112442304193974, + "epoch": 1.1689408706952567, + "grad_norm": 3.4090652465820312, + "learning_rate": 7.148808255369052e-05, + "loss": 1.984275360107422, + "mean_token_accuracy": 0.8656484684348107, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5141718898713589, + "epoch": 1.2988953866146848, + "grad_norm": 2.981410503387451, + "learning_rate": 7.135737100578312e-05, + "loss": 1.9902142333984374, + "mean_token_accuracy": 0.8631115168333053, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.51056716889143, + "epoch": 1.428849902534113, + "grad_norm": 2.724004030227661, + "learning_rate": 7.115354484244213e-05, + "loss": 1.9883427429199219, + "mean_token_accuracy": 0.8649928227066994, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.49679399371147154, + "epoch": 1.5588044184535412, + "grad_norm": 4.024278163909912, + "learning_rate": 7.08770228753891e-05, + "loss": 1.9636073303222656, + "mean_token_accuracy": 0.8658123564720154, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.48449113249778747, + "epoch": 1.6887589343729694, + "grad_norm": 6.3375043869018555, + "learning_rate": 7.052837328801699e-05, + "loss": 1.9092594909667968, + "mean_token_accuracy": 0.8693041172623635, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.49986490070819856, + "epoch": 1.8187134502923976, + "grad_norm": 2.8352301120758057, + "learning_rate": 7.010831246791564e-05, + "loss": 1.976134033203125, + "mean_token_accuracy": 0.865630615055561, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.4923377679288387, + "epoch": 1.9486679662118258, + "grad_norm": 3.2214341163635254, + "learning_rate": 6.96177035348746e-05, + "loss": 1.9487001037597655, + "mean_token_accuracy": 0.8670864734053612, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.494693323969841, + "eval_loss": 0.5423477292060852, + "eval_mean_token_accuracy": 0.8519259931949469, + "eval_num_tokens": 1889564.0, + "eval_runtime": 93.577, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 770 + }, + { + "entropy": 0.4363510547271326, + "epoch": 2.077972709551657, + "grad_norm": 3.9445464611053467, + "learning_rate": 6.905755456738823e-05, + "loss": 1.701644744873047, + "mean_token_accuracy": 0.8784636774254804, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.3984112246334553, + "epoch": 2.207927225471085, + "grad_norm": 2.7381982803344727, + "learning_rate": 6.842901653130672e-05, + "loss": 1.5607098388671874, + "mean_token_accuracy": 0.8866209423542023, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.40140418589115145, + "epoch": 2.3378817413905133, + "grad_norm": 3.321972131729126, + "learning_rate": 6.773338091488962e-05, + "loss": 1.5787547302246094, + "mean_token_accuracy": 0.8854541593790054, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.3955576819181442, + "epoch": 2.4678362573099415, + "grad_norm": 2.413280725479126, + "learning_rate": 6.697207707512082e-05, + "loss": 1.5567779541015625, + "mean_token_accuracy": 0.886657263636589, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.3982538402080536, + "epoch": 2.5977907732293697, + "grad_norm": 2.769259452819824, + "learning_rate": 6.614666930073788e-05, + "loss": 1.5671014404296875, + "mean_token_accuracy": 0.8863470497727394, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4086438773572445, + "epoch": 2.727745289148798, + "grad_norm": 2.636345624923706, + "learning_rate": 6.525885359801053e-05, + "loss": 1.5918766784667968, + "mean_token_accuracy": 0.8857174924015999, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.39681060910224913, + "epoch": 2.857699805068226, + "grad_norm": 2.708190679550171, + "learning_rate": 6.431045420587239e-05, + "loss": 1.55162841796875, + "mean_token_accuracy": 0.8879095411300659, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.41047369830310343, + "epoch": 2.9876543209876543, + "grad_norm": 2.741119623184204, + "learning_rate": 6.330341984756691e-05, + "loss": 1.607012939453125, + "mean_token_accuracy": 0.886100817322731, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.43975070491433144, + "eval_loss": 0.5563390254974365, + "eval_mean_token_accuracy": 0.8540356571857746, + "eval_num_tokens": 2834346.0, + "eval_runtime": 93.6235, + "eval_samples_per_second": 17.699, + "eval_steps_per_second": 2.222, + "step": 1155 + }, + { + "entropy": 0.3157867716634693, + "epoch": 3.116959064327485, + "grad_norm": 2.720665693283081, + "learning_rate": 6.223981972650901e-05, + "loss": 1.2099590301513672, + "mean_token_accuracy": 0.90753990621423, + "num_tokens": 2939773.0, + "step": 1200 + }, + { + "entropy": 0.30736712127923965, + "epoch": 3.246913580246914, + "grad_norm": 2.7423577308654785, + "learning_rate": 6.112183927459039e-05, + "loss": 1.1804743194580078, + "mean_token_accuracy": 0.9091711294651031, + "num_tokens": 3063617.0, + "step": 1250 + }, + { + "entropy": 0.31040274240076543, + "epoch": 3.3768680961663415, + "grad_norm": 2.4974067211151123, + "learning_rate": 5.995177566166415e-05, + "loss": 1.205712127685547, + "mean_token_accuracy": 0.9075283539295197, + "num_tokens": 3189800.0, + "step": 1300 + }, + { + "entropy": 0.3108955673873425, + "epoch": 3.50682261208577, + "grad_norm": 5.503927707672119, + "learning_rate": 5.873203307543615e-05, + "loss": 1.2054142761230469, + "mean_token_accuracy": 0.9072180911898613, + "num_tokens": 3311908.0, + "step": 1350 + }, + { + "entropy": 0.3123178996890783, + "epoch": 3.636777128005198, + "grad_norm": 3.6903016567230225, + "learning_rate": 5.746511778146122e-05, + "loss": 1.2007347869873046, + "mean_token_accuracy": 0.9080208915472031, + "num_tokens": 3432934.0, + "step": 1400 + }, + { + "entropy": 0.30725445054471495, + "epoch": 3.7667316439246266, + "grad_norm": 3.651270627975464, + "learning_rate": 5.615363297339524e-05, + "loss": 1.2028478240966798, + "mean_token_accuracy": 0.9079937645792961, + "num_tokens": 3561408.0, + "step": 1450 + }, + { + "entropy": 0.3093853303790092, + "epoch": 3.8966861598440543, + "grad_norm": 3.8038971424102783, + "learning_rate": 5.480027342408394e-05, + "loss": 1.2103264617919922, + "mean_token_accuracy": 0.9064547646045685, + "num_tokens": 3684624.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.3824075059248851, + "eval_loss": 0.596433162689209, + "eval_mean_token_accuracy": 0.8518055370793893, + "eval_num_tokens": 3779128.0, + "eval_runtime": 93.8991, + "eval_samples_per_second": 17.647, + "eval_steps_per_second": 2.215, + "step": 1540 + }, + { + "entropy": 0.29751914612312413, + "epoch": 4.025990903183885, + "grad_norm": 3.8319175243377686, + "learning_rate": 5.340781994847969e-05, + "loss": 1.1474579620361327, + "mean_token_accuracy": 0.9113474434344613, + "num_tokens": 3804993.0, + "step": 1550 + }, + { + "entropy": 0.22374473124742508, + "epoch": 4.155945419103314, + "grad_norm": 4.0295023918151855, + "learning_rate": 5.197913368976308e-05, + "loss": 0.8647676849365235, + "mean_token_accuracy": 0.9303225663304329, + "num_tokens": 3928162.0, + "step": 1600 + }, + { + "entropy": 0.2204760881140828, + "epoch": 4.2858999350227425, + "grad_norm": 2.7091262340545654, + "learning_rate": 5.0517150240410225e-05, + "loss": 0.8546311950683594, + "mean_token_accuracy": 0.9325515595078469, + "num_tokens": 4049012.0, + "step": 1650 + }, + { + "entropy": 0.22379870273172855, + "epoch": 4.41585445094217, + "grad_norm": 3.676753044128418, + "learning_rate": 4.90248736102854e-05, + "loss": 0.8702528381347656, + "mean_token_accuracy": 0.9302561604976654, + "num_tokens": 4167845.0, + "step": 1700 + }, + { + "entropy": 0.22574612841010094, + "epoch": 4.545808966861598, + "grad_norm": 3.4745290279388428, + "learning_rate": 4.750537005415305e-05, + "loss": 0.8742953491210937, + "mean_token_accuracy": 0.9304392230510712, + "num_tokens": 4287505.0, + "step": 1750 + }, + { + "entropy": 0.2232594309747219, + "epoch": 4.675763482781027, + "grad_norm": 2.898700475692749, + "learning_rate": 4.596176177129214e-05, + "loss": 0.8619278717041016, + "mean_token_accuracy": 0.930913093984127, + "num_tokens": 4412221.0, + "step": 1800 + }, + { + "entropy": 0.22699439719319345, + "epoch": 4.805717998700455, + "grad_norm": 4.208059310913086, + "learning_rate": 4.4397220490158516e-05, + "loss": 0.8736541748046875, + "mean_token_accuracy": 0.9296354326605797, + "num_tokens": 4537178.0, + "step": 1850 + }, + { + "entropy": 0.22327437780797482, + "epoch": 4.935672514619883, + "grad_norm": 4.770328998565674, + "learning_rate": 4.281496095127722e-05, + "loss": 0.8705735778808594, + "mean_token_accuracy": 0.9296216520667077, + "num_tokens": 4664196.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.30851531530228943, + "eval_loss": 0.7023613452911377, + "eval_mean_token_accuracy": 0.8488265181390139, + "eval_num_tokens": 4723910.0, + "eval_runtime": 93.5227, + "eval_samples_per_second": 17.718, + "eval_steps_per_second": 2.224, + "step": 1925 + }, + { + "entropy": 0.1837335507848754, + "epoch": 5.064977257959714, + "grad_norm": 3.1876420974731445, + "learning_rate": 4.1218234301755803e-05, + "loss": 0.7019867706298828, + "mean_token_accuracy": 0.943129480484143, + "num_tokens": 4789568.0, + "step": 1950 + }, + { + "entropy": 0.1513645588979125, + "epoch": 5.1949317738791425, + "grad_norm": 5.2119059562683105, + "learning_rate": 3.961032141499117e-05, + "loss": 0.5882163619995118, + "mean_token_accuracy": 0.9523130711913109, + "num_tokens": 4913407.0, + "step": 2000 + }, + { + "entropy": 0.1646271352469921, + "epoch": 5.32488628979857, + "grad_norm": 3.4650797843933105, + "learning_rate": 3.799452614929641e-05, + "loss": 0.6260359191894531, + "mean_token_accuracy": 0.9499588277935982, + "num_tokens": 5032951.0, + "step": 2050 + }, + { + "entropy": 0.1620502556487918, + "epoch": 5.454840805717999, + "grad_norm": 4.383810997009277, + "learning_rate": 3.637416855929934e-05, + "loss": 0.619534568786621, + "mean_token_accuracy": 0.9506754752993584, + "num_tokens": 5156908.0, + "step": 2100 + }, + { + "entropy": 0.16391102869063615, + "epoch": 5.584795321637427, + "grad_norm": 3.5354411602020264, + "learning_rate": 3.475257807406162e-05, + "loss": 0.631606559753418, + "mean_token_accuracy": 0.9495515289902687, + "num_tokens": 5276087.0, + "step": 2150 + }, + { + "entropy": 0.15723124787211418, + "epoch": 5.714749837556855, + "grad_norm": 2.841092824935913, + "learning_rate": 3.313308665593597e-05, + "loss": 0.605863380432129, + "mean_token_accuracy": 0.9508947885036468, + "num_tokens": 5398787.0, + "step": 2200 + }, + { + "entropy": 0.15846401400864124, + "epoch": 5.844704353476283, + "grad_norm": 3.759674549102783, + "learning_rate": 3.151902195421776e-05, + "loss": 0.6139367294311523, + "mean_token_accuracy": 0.9504388865828514, + "num_tokens": 5522332.0, + "step": 2250 + }, + { + "entropy": 0.16854628551751374, + "epoch": 5.974658869395712, + "grad_norm": 3.469398260116577, + "learning_rate": 2.991370046765923e-05, + "loss": 0.6406163787841797, + "mean_token_accuracy": 0.9481290274858475, + "num_tokens": 5642361.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.2671854439406441, + "eval_loss": 0.8231481909751892, + "eval_mean_token_accuracy": 0.8452930894608681, + "eval_num_tokens": 5668692.0, + "eval_runtime": 93.4211, + "eval_samples_per_second": 17.737, + "eval_steps_per_second": 2.226, + "step": 2310 + }, + { + "entropy": 0.12965927387227366, + "epoch": 6.1039636127355426, + "grad_norm": 4.258986473083496, + "learning_rate": 2.8320420729895018e-05, + "loss": 0.48968967437744143, + "mean_token_accuracy": 0.960972131197177, + "num_tokens": 5766129.0, + "step": 2350 + }, + { + "entropy": 0.1159947993606329, + "epoch": 6.23391812865497, + "grad_norm": 2.889965534210205, + "learning_rate": 2.6742456531781548e-05, + "loss": 0.43932266235351564, + "mean_token_accuracy": 0.9663794213533401, + "num_tokens": 5888746.0, + "step": 2400 + }, + { + "entropy": 0.11464667120948434, + "epoch": 6.363872644574399, + "grad_norm": 2.719203472137451, + "learning_rate": 2.518305019457679e-05, + "loss": 0.42842094421386717, + "mean_token_accuracy": 0.9660682818293571, + "num_tokens": 6014455.0, + "step": 2450 + }, + { + "entropy": 0.11858201997354627, + "epoch": 6.493827160493828, + "grad_norm": 3.611467123031616, + "learning_rate": 2.3645405907781953e-05, + "loss": 0.45053131103515626, + "mean_token_accuracy": 0.965420377254486, + "num_tokens": 6141229.0, + "step": 2500 + }, + { + "entropy": 0.12635288257151842, + "epoch": 6.623781676413255, + "grad_norm": 3.836108446121216, + "learning_rate": 2.213268314533456e-05, + "loss": 0.47164119720458986, + "mean_token_accuracy": 0.9634545907378197, + "num_tokens": 6257983.0, + "step": 2550 + }, + { + "entropy": 0.12443709814921021, + "epoch": 6.753736192332683, + "grad_norm": 3.9949610233306885, + "learning_rate": 2.0647990173680608e-05, + "loss": 0.4821424865722656, + "mean_token_accuracy": 0.9620693147182464, + "num_tokens": 6376198.0, + "step": 2600 + }, + { + "entropy": 0.11832262013107538, + "epoch": 6.883690708252112, + "grad_norm": 2.5650837421417236, + "learning_rate": 1.9194377665065605e-05, + "loss": 0.44591110229492187, + "mean_token_accuracy": 0.9656608006358147, + "num_tokens": 6502526.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.2215800589093795, + "eval_loss": 0.9339411854743958, + "eval_mean_token_accuracy": 0.8444493430165144, + "eval_num_tokens": 6613474.0, + "eval_runtime": 93.7158, + "eval_samples_per_second": 17.681, + "eval_steps_per_second": 2.219, + "step": 2695 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.346502637457326e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1493184d6d12d83f8ed718e072e05409f3dc4505 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3080/trainer_state.json @@ -0,0 +1,732 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 3080, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + }, + { + "entropy": 0.5479793678875544, + "epoch": 1.0389863547758285, + "grad_norm": 5.7315897941589355, + "learning_rate": 7.154541090666746e-05, + "loss": 2.099454345703125, + "mean_token_accuracy": 0.8584242837512912, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5112442304193974, + "epoch": 1.1689408706952567, + "grad_norm": 3.4090652465820312, + "learning_rate": 7.148808255369052e-05, + "loss": 1.984275360107422, + "mean_token_accuracy": 0.8656484684348107, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5141718898713589, + "epoch": 1.2988953866146848, + "grad_norm": 2.981410503387451, + "learning_rate": 7.135737100578312e-05, + "loss": 1.9902142333984374, + "mean_token_accuracy": 0.8631115168333053, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.51056716889143, + "epoch": 1.428849902534113, + "grad_norm": 2.724004030227661, + "learning_rate": 7.115354484244213e-05, + "loss": 1.9883427429199219, + "mean_token_accuracy": 0.8649928227066994, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.49679399371147154, + "epoch": 1.5588044184535412, + "grad_norm": 4.024278163909912, + "learning_rate": 7.08770228753891e-05, + "loss": 1.9636073303222656, + "mean_token_accuracy": 0.8658123564720154, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.48449113249778747, + "epoch": 1.6887589343729694, + "grad_norm": 6.3375043869018555, + "learning_rate": 7.052837328801699e-05, + "loss": 1.9092594909667968, + "mean_token_accuracy": 0.8693041172623635, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.49986490070819856, + "epoch": 1.8187134502923976, + "grad_norm": 2.8352301120758057, + "learning_rate": 7.010831246791564e-05, + "loss": 1.976134033203125, + "mean_token_accuracy": 0.865630615055561, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.4923377679288387, + "epoch": 1.9486679662118258, + "grad_norm": 3.2214341163635254, + "learning_rate": 6.96177035348746e-05, + "loss": 1.9487001037597655, + "mean_token_accuracy": 0.8670864734053612, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.494693323969841, + "eval_loss": 0.5423477292060852, + "eval_mean_token_accuracy": 0.8519259931949469, + "eval_num_tokens": 1889564.0, + "eval_runtime": 93.577, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 770 + }, + { + "entropy": 0.4363510547271326, + "epoch": 2.077972709551657, + "grad_norm": 3.9445464611053467, + "learning_rate": 6.905755456738823e-05, + "loss": 1.701644744873047, + "mean_token_accuracy": 0.8784636774254804, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.3984112246334553, + "epoch": 2.207927225471085, + "grad_norm": 2.7381982803344727, + "learning_rate": 6.842901653130672e-05, + "loss": 1.5607098388671874, + "mean_token_accuracy": 0.8866209423542023, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.40140418589115145, + "epoch": 2.3378817413905133, + "grad_norm": 3.321972131729126, + "learning_rate": 6.773338091488962e-05, + "loss": 1.5787547302246094, + "mean_token_accuracy": 0.8854541593790054, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.3955576819181442, + "epoch": 2.4678362573099415, + "grad_norm": 2.413280725479126, + "learning_rate": 6.697207707512082e-05, + "loss": 1.5567779541015625, + "mean_token_accuracy": 0.886657263636589, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.3982538402080536, + "epoch": 2.5977907732293697, + "grad_norm": 2.769259452819824, + "learning_rate": 6.614666930073788e-05, + "loss": 1.5671014404296875, + "mean_token_accuracy": 0.8863470497727394, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4086438773572445, + "epoch": 2.727745289148798, + "grad_norm": 2.636345624923706, + "learning_rate": 6.525885359801053e-05, + "loss": 1.5918766784667968, + "mean_token_accuracy": 0.8857174924015999, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.39681060910224913, + "epoch": 2.857699805068226, + "grad_norm": 2.708190679550171, + "learning_rate": 6.431045420587239e-05, + "loss": 1.55162841796875, + "mean_token_accuracy": 0.8879095411300659, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.41047369830310343, + "epoch": 2.9876543209876543, + "grad_norm": 2.741119623184204, + "learning_rate": 6.330341984756691e-05, + "loss": 1.607012939453125, + "mean_token_accuracy": 0.886100817322731, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.43975070491433144, + "eval_loss": 0.5563390254974365, + "eval_mean_token_accuracy": 0.8540356571857746, + "eval_num_tokens": 2834346.0, + "eval_runtime": 93.6235, + "eval_samples_per_second": 17.699, + "eval_steps_per_second": 2.222, + "step": 1155 + }, + { + "entropy": 0.3157867716634693, + "epoch": 3.116959064327485, + "grad_norm": 2.720665693283081, + "learning_rate": 6.223981972650901e-05, + "loss": 1.2099590301513672, + "mean_token_accuracy": 0.90753990621423, + "num_tokens": 2939773.0, + "step": 1200 + }, + { + "entropy": 0.30736712127923965, + "epoch": 3.246913580246914, + "grad_norm": 2.7423577308654785, + "learning_rate": 6.112183927459039e-05, + "loss": 1.1804743194580078, + "mean_token_accuracy": 0.9091711294651031, + "num_tokens": 3063617.0, + "step": 1250 + }, + { + "entropy": 0.31040274240076543, + "epoch": 3.3768680961663415, + "grad_norm": 2.4974067211151123, + "learning_rate": 5.995177566166415e-05, + "loss": 1.205712127685547, + "mean_token_accuracy": 0.9075283539295197, + "num_tokens": 3189800.0, + "step": 1300 + }, + { + "entropy": 0.3108955673873425, + "epoch": 3.50682261208577, + "grad_norm": 5.503927707672119, + "learning_rate": 5.873203307543615e-05, + "loss": 1.2054142761230469, + "mean_token_accuracy": 0.9072180911898613, + "num_tokens": 3311908.0, + "step": 1350 + }, + { + "entropy": 0.3123178996890783, + "epoch": 3.636777128005198, + "grad_norm": 3.6903016567230225, + "learning_rate": 5.746511778146122e-05, + "loss": 1.2007347869873046, + "mean_token_accuracy": 0.9080208915472031, + "num_tokens": 3432934.0, + "step": 1400 + }, + { + "entropy": 0.30725445054471495, + "epoch": 3.7667316439246266, + "grad_norm": 3.651270627975464, + "learning_rate": 5.615363297339524e-05, + "loss": 1.2028478240966798, + "mean_token_accuracy": 0.9079937645792961, + "num_tokens": 3561408.0, + "step": 1450 + }, + { + "entropy": 0.3093853303790092, + "epoch": 3.8966861598440543, + "grad_norm": 3.8038971424102783, + "learning_rate": 5.480027342408394e-05, + "loss": 1.2103264617919922, + "mean_token_accuracy": 0.9064547646045685, + "num_tokens": 3684624.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.3824075059248851, + "eval_loss": 0.596433162689209, + "eval_mean_token_accuracy": 0.8518055370793893, + "eval_num_tokens": 3779128.0, + "eval_runtime": 93.8991, + "eval_samples_per_second": 17.647, + "eval_steps_per_second": 2.215, + "step": 1540 + }, + { + "entropy": 0.29751914612312413, + "epoch": 4.025990903183885, + "grad_norm": 3.8319175243377686, + "learning_rate": 5.340781994847969e-05, + "loss": 1.1474579620361327, + "mean_token_accuracy": 0.9113474434344613, + "num_tokens": 3804993.0, + "step": 1550 + }, + { + "entropy": 0.22374473124742508, + "epoch": 4.155945419103314, + "grad_norm": 4.0295023918151855, + "learning_rate": 5.197913368976308e-05, + "loss": 0.8647676849365235, + "mean_token_accuracy": 0.9303225663304329, + "num_tokens": 3928162.0, + "step": 1600 + }, + { + "entropy": 0.2204760881140828, + "epoch": 4.2858999350227425, + "grad_norm": 2.7091262340545654, + "learning_rate": 5.0517150240410225e-05, + "loss": 0.8546311950683594, + "mean_token_accuracy": 0.9325515595078469, + "num_tokens": 4049012.0, + "step": 1650 + }, + { + "entropy": 0.22379870273172855, + "epoch": 4.41585445094217, + "grad_norm": 3.676753044128418, + "learning_rate": 4.90248736102854e-05, + "loss": 0.8702528381347656, + "mean_token_accuracy": 0.9302561604976654, + "num_tokens": 4167845.0, + "step": 1700 + }, + { + "entropy": 0.22574612841010094, + "epoch": 4.545808966861598, + "grad_norm": 3.4745290279388428, + "learning_rate": 4.750537005415305e-05, + "loss": 0.8742953491210937, + "mean_token_accuracy": 0.9304392230510712, + "num_tokens": 4287505.0, + "step": 1750 + }, + { + "entropy": 0.2232594309747219, + "epoch": 4.675763482781027, + "grad_norm": 2.898700475692749, + "learning_rate": 4.596176177129214e-05, + "loss": 0.8619278717041016, + "mean_token_accuracy": 0.930913093984127, + "num_tokens": 4412221.0, + "step": 1800 + }, + { + "entropy": 0.22699439719319345, + "epoch": 4.805717998700455, + "grad_norm": 4.208059310913086, + "learning_rate": 4.4397220490158516e-05, + "loss": 0.8736541748046875, + "mean_token_accuracy": 0.9296354326605797, + "num_tokens": 4537178.0, + "step": 1850 + }, + { + "entropy": 0.22327437780797482, + "epoch": 4.935672514619883, + "grad_norm": 4.770328998565674, + "learning_rate": 4.281496095127722e-05, + "loss": 0.8705735778808594, + "mean_token_accuracy": 0.9296216520667077, + "num_tokens": 4664196.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.30851531530228943, + "eval_loss": 0.7023613452911377, + "eval_mean_token_accuracy": 0.8488265181390139, + "eval_num_tokens": 4723910.0, + "eval_runtime": 93.5227, + "eval_samples_per_second": 17.718, + "eval_steps_per_second": 2.224, + "step": 1925 + }, + { + "entropy": 0.1837335507848754, + "epoch": 5.064977257959714, + "grad_norm": 3.1876420974731445, + "learning_rate": 4.1218234301755803e-05, + "loss": 0.7019867706298828, + "mean_token_accuracy": 0.943129480484143, + "num_tokens": 4789568.0, + "step": 1950 + }, + { + "entropy": 0.1513645588979125, + "epoch": 5.1949317738791425, + "grad_norm": 5.2119059562683105, + "learning_rate": 3.961032141499117e-05, + "loss": 0.5882163619995118, + "mean_token_accuracy": 0.9523130711913109, + "num_tokens": 4913407.0, + "step": 2000 + }, + { + "entropy": 0.1646271352469921, + "epoch": 5.32488628979857, + "grad_norm": 3.4650797843933105, + "learning_rate": 3.799452614929641e-05, + "loss": 0.6260359191894531, + "mean_token_accuracy": 0.9499588277935982, + "num_tokens": 5032951.0, + "step": 2050 + }, + { + "entropy": 0.1620502556487918, + "epoch": 5.454840805717999, + "grad_norm": 4.383810997009277, + "learning_rate": 3.637416855929934e-05, + "loss": 0.619534568786621, + "mean_token_accuracy": 0.9506754752993584, + "num_tokens": 5156908.0, + "step": 2100 + }, + { + "entropy": 0.16391102869063615, + "epoch": 5.584795321637427, + "grad_norm": 3.5354411602020264, + "learning_rate": 3.475257807406162e-05, + "loss": 0.631606559753418, + "mean_token_accuracy": 0.9495515289902687, + "num_tokens": 5276087.0, + "step": 2150 + }, + { + "entropy": 0.15723124787211418, + "epoch": 5.714749837556855, + "grad_norm": 2.841092824935913, + "learning_rate": 3.313308665593597e-05, + "loss": 0.605863380432129, + "mean_token_accuracy": 0.9508947885036468, + "num_tokens": 5398787.0, + "step": 2200 + }, + { + "entropy": 0.15846401400864124, + "epoch": 5.844704353476283, + "grad_norm": 3.759674549102783, + "learning_rate": 3.151902195421776e-05, + "loss": 0.6139367294311523, + "mean_token_accuracy": 0.9504388865828514, + "num_tokens": 5522332.0, + "step": 2250 + }, + { + "entropy": 0.16854628551751374, + "epoch": 5.974658869395712, + "grad_norm": 3.469398260116577, + "learning_rate": 2.991370046765923e-05, + "loss": 0.6406163787841797, + "mean_token_accuracy": 0.9481290274858475, + "num_tokens": 5642361.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.2671854439406441, + "eval_loss": 0.8231481909751892, + "eval_mean_token_accuracy": 0.8452930894608681, + "eval_num_tokens": 5668692.0, + "eval_runtime": 93.4211, + "eval_samples_per_second": 17.737, + "eval_steps_per_second": 2.226, + "step": 2310 + }, + { + "entropy": 0.12965927387227366, + "epoch": 6.1039636127355426, + "grad_norm": 4.258986473083496, + "learning_rate": 2.8320420729895018e-05, + "loss": 0.48968967437744143, + "mean_token_accuracy": 0.960972131197177, + "num_tokens": 5766129.0, + "step": 2350 + }, + { + "entropy": 0.1159947993606329, + "epoch": 6.23391812865497, + "grad_norm": 2.889965534210205, + "learning_rate": 2.6742456531781548e-05, + "loss": 0.43932266235351564, + "mean_token_accuracy": 0.9663794213533401, + "num_tokens": 5888746.0, + "step": 2400 + }, + { + "entropy": 0.11464667120948434, + "epoch": 6.363872644574399, + "grad_norm": 2.719203472137451, + "learning_rate": 2.518305019457679e-05, + "loss": 0.42842094421386717, + "mean_token_accuracy": 0.9660682818293571, + "num_tokens": 6014455.0, + "step": 2450 + }, + { + "entropy": 0.11858201997354627, + "epoch": 6.493827160493828, + "grad_norm": 3.611467123031616, + "learning_rate": 2.3645405907781953e-05, + "loss": 0.45053131103515626, + "mean_token_accuracy": 0.965420377254486, + "num_tokens": 6141229.0, + "step": 2500 + }, + { + "entropy": 0.12635288257151842, + "epoch": 6.623781676413255, + "grad_norm": 3.836108446121216, + "learning_rate": 2.213268314533456e-05, + "loss": 0.47164119720458986, + "mean_token_accuracy": 0.9634545907378197, + "num_tokens": 6257983.0, + "step": 2550 + }, + { + "entropy": 0.12443709814921021, + "epoch": 6.753736192332683, + "grad_norm": 3.9949610233306885, + "learning_rate": 2.0647990173680608e-05, + "loss": 0.4821424865722656, + "mean_token_accuracy": 0.9620693147182464, + "num_tokens": 6376198.0, + "step": 2600 + }, + { + "entropy": 0.11832262013107538, + "epoch": 6.883690708252112, + "grad_norm": 2.5650837421417236, + "learning_rate": 1.9194377665065605e-05, + "loss": 0.44591110229492187, + "mean_token_accuracy": 0.9656608006358147, + "num_tokens": 6502526.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.2215800589093795, + "eval_loss": 0.9339411854743958, + "eval_mean_token_accuracy": 0.8444493430165144, + "eval_num_tokens": 6613474.0, + "eval_runtime": 93.7158, + "eval_samples_per_second": 17.681, + "eval_steps_per_second": 2.219, + "step": 2695 + }, + { + "entropy": 0.11301927620442069, + "epoch": 7.012995451591943, + "grad_norm": 2.6045796871185303, + "learning_rate": 1.7774832429166907e-05, + "loss": 0.4257319259643555, + "mean_token_accuracy": 0.967041172274393, + "num_tokens": 6626464.0, + "step": 2700 + }, + { + "entropy": 0.09376694705337286, + "epoch": 7.142949967511371, + "grad_norm": 3.673069715499878, + "learning_rate": 1.639227127594797e-05, + "loss": 0.3493629837036133, + "mean_token_accuracy": 0.9737332627177239, + "num_tokens": 6746276.0, + "step": 2750 + }, + { + "entropy": 0.0937589898519218, + "epoch": 7.272904483430799, + "grad_norm": 1.284744381904602, + "learning_rate": 1.504953502234417e-05, + "loss": 0.3494458770751953, + "mean_token_accuracy": 0.9736128148436546, + "num_tokens": 6868131.0, + "step": 2800 + }, + { + "entropy": 0.09080767655745149, + "epoch": 7.402858999350228, + "grad_norm": 2.304025888442993, + "learning_rate": 1.3749382655095595e-05, + "loss": 0.3455091094970703, + "mean_token_accuracy": 0.9737611535191536, + "num_tokens": 6993803.0, + "step": 2850 + }, + { + "entropy": 0.09955240281298756, + "epoch": 7.532813515269655, + "grad_norm": 2.3498237133026123, + "learning_rate": 1.2494485661720107e-05, + "loss": 0.37081523895263674, + "mean_token_accuracy": 0.9723404514789581, + "num_tokens": 7113063.0, + "step": 2900 + }, + { + "entropy": 0.09484823819249869, + "epoch": 7.662768031189084, + "grad_norm": 2.8144359588623047, + "learning_rate": 1.1287422541275542e-05, + "loss": 0.3587993621826172, + "mean_token_accuracy": 0.9728687980771065, + "num_tokens": 7237174.0, + "step": 2950 + }, + { + "entropy": 0.09782332878559828, + "epoch": 7.792722547108512, + "grad_norm": 2.4676949977874756, + "learning_rate": 1.0130673506189897e-05, + "loss": 0.36967514038085936, + "mean_token_accuracy": 0.9725084081292152, + "num_tokens": 7357301.0, + "step": 3000 + }, + { + "entropy": 0.0982075690664351, + "epoch": 7.92267706302794, + "grad_norm": 2.4878036975860596, + "learning_rate": 9.026615386045925e-06, + "loss": 0.3717473602294922, + "mean_token_accuracy": 0.9727215927839279, + "num_tokens": 7482431.0, + "step": 3050 + }, + { + "epoch": 8.0, + "eval_entropy": 0.18732091820297334, + "eval_loss": 1.0530312061309814, + "eval_mean_token_accuracy": 0.8431636665302974, + "eval_num_tokens": 7558256.0, + "eval_runtime": 93.2972, + "eval_samples_per_second": 17.76, + "eval_steps_per_second": 2.229, + "step": 3080 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.681084793340478e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b7996630cd7f1990c06f39b7e12bbdc36332e21b --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3465/trainer_state.json @@ -0,0 +1,823 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 3465, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + }, + { + "entropy": 0.5479793678875544, + "epoch": 1.0389863547758285, + "grad_norm": 5.7315897941589355, + "learning_rate": 7.154541090666746e-05, + "loss": 2.099454345703125, + "mean_token_accuracy": 0.8584242837512912, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5112442304193974, + "epoch": 1.1689408706952567, + "grad_norm": 3.4090652465820312, + "learning_rate": 7.148808255369052e-05, + "loss": 1.984275360107422, + "mean_token_accuracy": 0.8656484684348107, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5141718898713589, + "epoch": 1.2988953866146848, + "grad_norm": 2.981410503387451, + "learning_rate": 7.135737100578312e-05, + "loss": 1.9902142333984374, + "mean_token_accuracy": 0.8631115168333053, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.51056716889143, + "epoch": 1.428849902534113, + "grad_norm": 2.724004030227661, + "learning_rate": 7.115354484244213e-05, + "loss": 1.9883427429199219, + "mean_token_accuracy": 0.8649928227066994, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.49679399371147154, + "epoch": 1.5588044184535412, + "grad_norm": 4.024278163909912, + "learning_rate": 7.08770228753891e-05, + "loss": 1.9636073303222656, + "mean_token_accuracy": 0.8658123564720154, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.48449113249778747, + "epoch": 1.6887589343729694, + "grad_norm": 6.3375043869018555, + "learning_rate": 7.052837328801699e-05, + "loss": 1.9092594909667968, + "mean_token_accuracy": 0.8693041172623635, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.49986490070819856, + "epoch": 1.8187134502923976, + "grad_norm": 2.8352301120758057, + "learning_rate": 7.010831246791564e-05, + "loss": 1.976134033203125, + "mean_token_accuracy": 0.865630615055561, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.4923377679288387, + "epoch": 1.9486679662118258, + "grad_norm": 3.2214341163635254, + "learning_rate": 6.96177035348746e-05, + "loss": 1.9487001037597655, + "mean_token_accuracy": 0.8670864734053612, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.494693323969841, + "eval_loss": 0.5423477292060852, + "eval_mean_token_accuracy": 0.8519259931949469, + "eval_num_tokens": 1889564.0, + "eval_runtime": 93.577, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 770 + }, + { + "entropy": 0.4363510547271326, + "epoch": 2.077972709551657, + "grad_norm": 3.9445464611053467, + "learning_rate": 6.905755456738823e-05, + "loss": 1.701644744873047, + "mean_token_accuracy": 0.8784636774254804, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.3984112246334553, + "epoch": 2.207927225471085, + "grad_norm": 2.7381982803344727, + "learning_rate": 6.842901653130672e-05, + "loss": 1.5607098388671874, + "mean_token_accuracy": 0.8866209423542023, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.40140418589115145, + "epoch": 2.3378817413905133, + "grad_norm": 3.321972131729126, + "learning_rate": 6.773338091488962e-05, + "loss": 1.5787547302246094, + "mean_token_accuracy": 0.8854541593790054, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.3955576819181442, + "epoch": 2.4678362573099415, + "grad_norm": 2.413280725479126, + "learning_rate": 6.697207707512082e-05, + "loss": 1.5567779541015625, + "mean_token_accuracy": 0.886657263636589, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.3982538402080536, + "epoch": 2.5977907732293697, + "grad_norm": 2.769259452819824, + "learning_rate": 6.614666930073788e-05, + "loss": 1.5671014404296875, + "mean_token_accuracy": 0.8863470497727394, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4086438773572445, + "epoch": 2.727745289148798, + "grad_norm": 2.636345624923706, + "learning_rate": 6.525885359801053e-05, + "loss": 1.5918766784667968, + "mean_token_accuracy": 0.8857174924015999, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.39681060910224913, + "epoch": 2.857699805068226, + "grad_norm": 2.708190679550171, + "learning_rate": 6.431045420587239e-05, + "loss": 1.55162841796875, + "mean_token_accuracy": 0.8879095411300659, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.41047369830310343, + "epoch": 2.9876543209876543, + "grad_norm": 2.741119623184204, + "learning_rate": 6.330341984756691e-05, + "loss": 1.607012939453125, + "mean_token_accuracy": 0.886100817322731, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.43975070491433144, + "eval_loss": 0.5563390254974365, + "eval_mean_token_accuracy": 0.8540356571857746, + "eval_num_tokens": 2834346.0, + "eval_runtime": 93.6235, + "eval_samples_per_second": 17.699, + "eval_steps_per_second": 2.222, + "step": 1155 + }, + { + "entropy": 0.3157867716634693, + "epoch": 3.116959064327485, + "grad_norm": 2.720665693283081, + "learning_rate": 6.223981972650901e-05, + "loss": 1.2099590301513672, + "mean_token_accuracy": 0.90753990621423, + "num_tokens": 2939773.0, + "step": 1200 + }, + { + "entropy": 0.30736712127923965, + "epoch": 3.246913580246914, + "grad_norm": 2.7423577308654785, + "learning_rate": 6.112183927459039e-05, + "loss": 1.1804743194580078, + "mean_token_accuracy": 0.9091711294651031, + "num_tokens": 3063617.0, + "step": 1250 + }, + { + "entropy": 0.31040274240076543, + "epoch": 3.3768680961663415, + "grad_norm": 2.4974067211151123, + "learning_rate": 5.995177566166415e-05, + "loss": 1.205712127685547, + "mean_token_accuracy": 0.9075283539295197, + "num_tokens": 3189800.0, + "step": 1300 + }, + { + "entropy": 0.3108955673873425, + "epoch": 3.50682261208577, + "grad_norm": 5.503927707672119, + "learning_rate": 5.873203307543615e-05, + "loss": 1.2054142761230469, + "mean_token_accuracy": 0.9072180911898613, + "num_tokens": 3311908.0, + "step": 1350 + }, + { + "entropy": 0.3123178996890783, + "epoch": 3.636777128005198, + "grad_norm": 3.6903016567230225, + "learning_rate": 5.746511778146122e-05, + "loss": 1.2007347869873046, + "mean_token_accuracy": 0.9080208915472031, + "num_tokens": 3432934.0, + "step": 1400 + }, + { + "entropy": 0.30725445054471495, + "epoch": 3.7667316439246266, + "grad_norm": 3.651270627975464, + "learning_rate": 5.615363297339524e-05, + "loss": 1.2028478240966798, + "mean_token_accuracy": 0.9079937645792961, + "num_tokens": 3561408.0, + "step": 1450 + }, + { + "entropy": 0.3093853303790092, + "epoch": 3.8966861598440543, + "grad_norm": 3.8038971424102783, + "learning_rate": 5.480027342408394e-05, + "loss": 1.2103264617919922, + "mean_token_accuracy": 0.9064547646045685, + "num_tokens": 3684624.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.3824075059248851, + "eval_loss": 0.596433162689209, + "eval_mean_token_accuracy": 0.8518055370793893, + "eval_num_tokens": 3779128.0, + "eval_runtime": 93.8991, + "eval_samples_per_second": 17.647, + "eval_steps_per_second": 2.215, + "step": 1540 + }, + { + "entropy": 0.29751914612312413, + "epoch": 4.025990903183885, + "grad_norm": 3.8319175243377686, + "learning_rate": 5.340781994847969e-05, + "loss": 1.1474579620361327, + "mean_token_accuracy": 0.9113474434344613, + "num_tokens": 3804993.0, + "step": 1550 + }, + { + "entropy": 0.22374473124742508, + "epoch": 4.155945419103314, + "grad_norm": 4.0295023918151855, + "learning_rate": 5.197913368976308e-05, + "loss": 0.8647676849365235, + "mean_token_accuracy": 0.9303225663304329, + "num_tokens": 3928162.0, + "step": 1600 + }, + { + "entropy": 0.2204760881140828, + "epoch": 4.2858999350227425, + "grad_norm": 2.7091262340545654, + "learning_rate": 5.0517150240410225e-05, + "loss": 0.8546311950683594, + "mean_token_accuracy": 0.9325515595078469, + "num_tokens": 4049012.0, + "step": 1650 + }, + { + "entropy": 0.22379870273172855, + "epoch": 4.41585445094217, + "grad_norm": 3.676753044128418, + "learning_rate": 4.90248736102854e-05, + "loss": 0.8702528381347656, + "mean_token_accuracy": 0.9302561604976654, + "num_tokens": 4167845.0, + "step": 1700 + }, + { + "entropy": 0.22574612841010094, + "epoch": 4.545808966861598, + "grad_norm": 3.4745290279388428, + "learning_rate": 4.750537005415305e-05, + "loss": 0.8742953491210937, + "mean_token_accuracy": 0.9304392230510712, + "num_tokens": 4287505.0, + "step": 1750 + }, + { + "entropy": 0.2232594309747219, + "epoch": 4.675763482781027, + "grad_norm": 2.898700475692749, + "learning_rate": 4.596176177129214e-05, + "loss": 0.8619278717041016, + "mean_token_accuracy": 0.930913093984127, + "num_tokens": 4412221.0, + "step": 1800 + }, + { + "entropy": 0.22699439719319345, + "epoch": 4.805717998700455, + "grad_norm": 4.208059310913086, + "learning_rate": 4.4397220490158516e-05, + "loss": 0.8736541748046875, + "mean_token_accuracy": 0.9296354326605797, + "num_tokens": 4537178.0, + "step": 1850 + }, + { + "entropy": 0.22327437780797482, + "epoch": 4.935672514619883, + "grad_norm": 4.770328998565674, + "learning_rate": 4.281496095127722e-05, + "loss": 0.8705735778808594, + "mean_token_accuracy": 0.9296216520667077, + "num_tokens": 4664196.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.30851531530228943, + "eval_loss": 0.7023613452911377, + "eval_mean_token_accuracy": 0.8488265181390139, + "eval_num_tokens": 4723910.0, + "eval_runtime": 93.5227, + "eval_samples_per_second": 17.718, + "eval_steps_per_second": 2.224, + "step": 1925 + }, + { + "entropy": 0.1837335507848754, + "epoch": 5.064977257959714, + "grad_norm": 3.1876420974731445, + "learning_rate": 4.1218234301755803e-05, + "loss": 0.7019867706298828, + "mean_token_accuracy": 0.943129480484143, + "num_tokens": 4789568.0, + "step": 1950 + }, + { + "entropy": 0.1513645588979125, + "epoch": 5.1949317738791425, + "grad_norm": 5.2119059562683105, + "learning_rate": 3.961032141499117e-05, + "loss": 0.5882163619995118, + "mean_token_accuracy": 0.9523130711913109, + "num_tokens": 4913407.0, + "step": 2000 + }, + { + "entropy": 0.1646271352469921, + "epoch": 5.32488628979857, + "grad_norm": 3.4650797843933105, + "learning_rate": 3.799452614929641e-05, + "loss": 0.6260359191894531, + "mean_token_accuracy": 0.9499588277935982, + "num_tokens": 5032951.0, + "step": 2050 + }, + { + "entropy": 0.1620502556487918, + "epoch": 5.454840805717999, + "grad_norm": 4.383810997009277, + "learning_rate": 3.637416855929934e-05, + "loss": 0.619534568786621, + "mean_token_accuracy": 0.9506754752993584, + "num_tokens": 5156908.0, + "step": 2100 + }, + { + "entropy": 0.16391102869063615, + "epoch": 5.584795321637427, + "grad_norm": 3.5354411602020264, + "learning_rate": 3.475257807406162e-05, + "loss": 0.631606559753418, + "mean_token_accuracy": 0.9495515289902687, + "num_tokens": 5276087.0, + "step": 2150 + }, + { + "entropy": 0.15723124787211418, + "epoch": 5.714749837556855, + "grad_norm": 2.841092824935913, + "learning_rate": 3.313308665593597e-05, + "loss": 0.605863380432129, + "mean_token_accuracy": 0.9508947885036468, + "num_tokens": 5398787.0, + "step": 2200 + }, + { + "entropy": 0.15846401400864124, + "epoch": 5.844704353476283, + "grad_norm": 3.759674549102783, + "learning_rate": 3.151902195421776e-05, + "loss": 0.6139367294311523, + "mean_token_accuracy": 0.9504388865828514, + "num_tokens": 5522332.0, + "step": 2250 + }, + { + "entropy": 0.16854628551751374, + "epoch": 5.974658869395712, + "grad_norm": 3.469398260116577, + "learning_rate": 2.991370046765923e-05, + "loss": 0.6406163787841797, + "mean_token_accuracy": 0.9481290274858475, + "num_tokens": 5642361.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.2671854439406441, + "eval_loss": 0.8231481909751892, + "eval_mean_token_accuracy": 0.8452930894608681, + "eval_num_tokens": 5668692.0, + "eval_runtime": 93.4211, + "eval_samples_per_second": 17.737, + "eval_steps_per_second": 2.226, + "step": 2310 + }, + { + "entropy": 0.12965927387227366, + "epoch": 6.1039636127355426, + "grad_norm": 4.258986473083496, + "learning_rate": 2.8320420729895018e-05, + "loss": 0.48968967437744143, + "mean_token_accuracy": 0.960972131197177, + "num_tokens": 5766129.0, + "step": 2350 + }, + { + "entropy": 0.1159947993606329, + "epoch": 6.23391812865497, + "grad_norm": 2.889965534210205, + "learning_rate": 2.6742456531781548e-05, + "loss": 0.43932266235351564, + "mean_token_accuracy": 0.9663794213533401, + "num_tokens": 5888746.0, + "step": 2400 + }, + { + "entropy": 0.11464667120948434, + "epoch": 6.363872644574399, + "grad_norm": 2.719203472137451, + "learning_rate": 2.518305019457679e-05, + "loss": 0.42842094421386717, + "mean_token_accuracy": 0.9660682818293571, + "num_tokens": 6014455.0, + "step": 2450 + }, + { + "entropy": 0.11858201997354627, + "epoch": 6.493827160493828, + "grad_norm": 3.611467123031616, + "learning_rate": 2.3645405907781953e-05, + "loss": 0.45053131103515626, + "mean_token_accuracy": 0.965420377254486, + "num_tokens": 6141229.0, + "step": 2500 + }, + { + "entropy": 0.12635288257151842, + "epoch": 6.623781676413255, + "grad_norm": 3.836108446121216, + "learning_rate": 2.213268314533456e-05, + "loss": 0.47164119720458986, + "mean_token_accuracy": 0.9634545907378197, + "num_tokens": 6257983.0, + "step": 2550 + }, + { + "entropy": 0.12443709814921021, + "epoch": 6.753736192332683, + "grad_norm": 3.9949610233306885, + "learning_rate": 2.0647990173680608e-05, + "loss": 0.4821424865722656, + "mean_token_accuracy": 0.9620693147182464, + "num_tokens": 6376198.0, + "step": 2600 + }, + { + "entropy": 0.11832262013107538, + "epoch": 6.883690708252112, + "grad_norm": 2.5650837421417236, + "learning_rate": 1.9194377665065605e-05, + "loss": 0.44591110229492187, + "mean_token_accuracy": 0.9656608006358147, + "num_tokens": 6502526.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.2215800589093795, + "eval_loss": 0.9339411854743958, + "eval_mean_token_accuracy": 0.8444493430165144, + "eval_num_tokens": 6613474.0, + "eval_runtime": 93.7158, + "eval_samples_per_second": 17.681, + "eval_steps_per_second": 2.219, + "step": 2695 + }, + { + "entropy": 0.11301927620442069, + "epoch": 7.012995451591943, + "grad_norm": 2.6045796871185303, + "learning_rate": 1.7774832429166907e-05, + "loss": 0.4257319259643555, + "mean_token_accuracy": 0.967041172274393, + "num_tokens": 6626464.0, + "step": 2700 + }, + { + "entropy": 0.09376694705337286, + "epoch": 7.142949967511371, + "grad_norm": 3.673069715499878, + "learning_rate": 1.639227127594797e-05, + "loss": 0.3493629837036133, + "mean_token_accuracy": 0.9737332627177239, + "num_tokens": 6746276.0, + "step": 2750 + }, + { + "entropy": 0.0937589898519218, + "epoch": 7.272904483430799, + "grad_norm": 1.284744381904602, + "learning_rate": 1.504953502234417e-05, + "loss": 0.3494458770751953, + "mean_token_accuracy": 0.9736128148436546, + "num_tokens": 6868131.0, + "step": 2800 + }, + { + "entropy": 0.09080767655745149, + "epoch": 7.402858999350228, + "grad_norm": 2.304025888442993, + "learning_rate": 1.3749382655095595e-05, + "loss": 0.3455091094970703, + "mean_token_accuracy": 0.9737611535191536, + "num_tokens": 6993803.0, + "step": 2850 + }, + { + "entropy": 0.09955240281298756, + "epoch": 7.532813515269655, + "grad_norm": 2.3498237133026123, + "learning_rate": 1.2494485661720107e-05, + "loss": 0.37081523895263674, + "mean_token_accuracy": 0.9723404514789581, + "num_tokens": 7113063.0, + "step": 2900 + }, + { + "entropy": 0.09484823819249869, + "epoch": 7.662768031189084, + "grad_norm": 2.8144359588623047, + "learning_rate": 1.1287422541275542e-05, + "loss": 0.3587993621826172, + "mean_token_accuracy": 0.9728687980771065, + "num_tokens": 7237174.0, + "step": 2950 + }, + { + "entropy": 0.09782332878559828, + "epoch": 7.792722547108512, + "grad_norm": 2.4676949977874756, + "learning_rate": 1.0130673506189897e-05, + "loss": 0.36967514038085936, + "mean_token_accuracy": 0.9725084081292152, + "num_tokens": 7357301.0, + "step": 3000 + }, + { + "entropy": 0.0982075690664351, + "epoch": 7.92267706302794, + "grad_norm": 2.4878036975860596, + "learning_rate": 9.026615386045925e-06, + "loss": 0.3717473602294922, + "mean_token_accuracy": 0.9727215927839279, + "num_tokens": 7482431.0, + "step": 3050 + }, + { + "epoch": 8.0, + "eval_entropy": 0.18732091820297334, + "eval_loss": 1.0530312061309814, + "eval_mean_token_accuracy": 0.8431636665302974, + "eval_num_tokens": 7558256.0, + "eval_runtime": 93.2972, + "eval_samples_per_second": 17.76, + "eval_steps_per_second": 2.229, + "step": 3080 + }, + { + "entropy": 0.09050090180419797, + "epoch": 8.05198180636777, + "grad_norm": 1.581737995147705, + "learning_rate": 7.977516743791511e-06, + "loss": 0.3330033493041992, + "mean_token_accuracy": 0.9748336343309987, + "num_tokens": 7607307.0, + "step": 3100 + }, + { + "entropy": 0.0836846032179892, + "epoch": 8.1819363222872, + "grad_norm": 2.2539267539978027, + "learning_rate": 6.9855332144109845e-06, + "loss": 0.3167022705078125, + "mean_token_accuracy": 0.9763787424564362, + "num_tokens": 7727840.0, + "step": 3150 + }, + { + "entropy": 0.08659086996689438, + "epoch": 8.311890838206628, + "grad_norm": 1.3999571800231934, + "learning_rate": 6.052703075635024e-06, + "loss": 0.31993478775024414, + "mean_token_accuracy": 0.9760957387089729, + "num_tokens": 7848468.0, + "step": 3200 + }, + { + "entropy": 0.08405340114608408, + "epoch": 8.441845354126055, + "grad_norm": 1.0389668941497803, + "learning_rate": 5.180943059790417e-06, + "loss": 0.31426584243774414, + "mean_token_accuracy": 0.9764248445630074, + "num_tokens": 7971324.0, + "step": 3250 + }, + { + "entropy": 0.09312817815691232, + "epoch": 8.571799870045485, + "grad_norm": 2.099010467529297, + "learning_rate": 4.372044415395074e-06, + "loss": 0.3411895370483398, + "mean_token_accuracy": 0.9753422957658767, + "num_tokens": 8086025.0, + "step": 3300 + }, + { + "entropy": 0.08220499983988702, + "epoch": 8.701754385964913, + "grad_norm": 2.398397445678711, + "learning_rate": 3.627669226590941e-06, + "loss": 0.3075200843811035, + "mean_token_accuracy": 0.9770644724369049, + "num_tokens": 8213441.0, + "step": 3350 + }, + { + "entropy": 0.0838418971374631, + "epoch": 8.83170890188434, + "grad_norm": 1.176540493965149, + "learning_rate": 2.949346997977317e-06, + "loss": 0.31322656631469725, + "mean_token_accuracy": 0.9763040187954902, + "num_tokens": 8340533.0, + "step": 3400 + }, + { + "entropy": 0.08329285142943263, + "epoch": 8.961663417803768, + "grad_norm": 1.3044288158416748, + "learning_rate": 2.338471511861953e-06, + "loss": 0.3066983985900879, + "mean_token_accuracy": 0.9766682273149491, + "num_tokens": 8468264.0, + "step": 3450 + }, + { + "epoch": 9.0, + "eval_entropy": 0.16551104488854224, + "eval_loss": 1.1869975328445435, + "eval_mean_token_accuracy": 0.841982166067912, + "eval_num_tokens": 8503038.0, + "eval_runtime": 93.2522, + "eval_samples_per_second": 17.769, + "eval_steps_per_second": 2.231, + "step": 3465 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.014939375394102e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d59a9fa2b0e0f923a6108f7fc1365504ca18b2dc --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-385/trainer_state.json @@ -0,0 +1,115 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 385, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.3383035994615795e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2917b7ffe501d1cac53c0b883891b440227747a9 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-3850/trainer_state.json @@ -0,0 +1,914 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 3850, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + }, + { + "entropy": 0.5479793678875544, + "epoch": 1.0389863547758285, + "grad_norm": 5.7315897941589355, + "learning_rate": 7.154541090666746e-05, + "loss": 2.099454345703125, + "mean_token_accuracy": 0.8584242837512912, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5112442304193974, + "epoch": 1.1689408706952567, + "grad_norm": 3.4090652465820312, + "learning_rate": 7.148808255369052e-05, + "loss": 1.984275360107422, + "mean_token_accuracy": 0.8656484684348107, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5141718898713589, + "epoch": 1.2988953866146848, + "grad_norm": 2.981410503387451, + "learning_rate": 7.135737100578312e-05, + "loss": 1.9902142333984374, + "mean_token_accuracy": 0.8631115168333053, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.51056716889143, + "epoch": 1.428849902534113, + "grad_norm": 2.724004030227661, + "learning_rate": 7.115354484244213e-05, + "loss": 1.9883427429199219, + "mean_token_accuracy": 0.8649928227066994, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.49679399371147154, + "epoch": 1.5588044184535412, + "grad_norm": 4.024278163909912, + "learning_rate": 7.08770228753891e-05, + "loss": 1.9636073303222656, + "mean_token_accuracy": 0.8658123564720154, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.48449113249778747, + "epoch": 1.6887589343729694, + "grad_norm": 6.3375043869018555, + "learning_rate": 7.052837328801699e-05, + "loss": 1.9092594909667968, + "mean_token_accuracy": 0.8693041172623635, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.49986490070819856, + "epoch": 1.8187134502923976, + "grad_norm": 2.8352301120758057, + "learning_rate": 7.010831246791564e-05, + "loss": 1.976134033203125, + "mean_token_accuracy": 0.865630615055561, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.4923377679288387, + "epoch": 1.9486679662118258, + "grad_norm": 3.2214341163635254, + "learning_rate": 6.96177035348746e-05, + "loss": 1.9487001037597655, + "mean_token_accuracy": 0.8670864734053612, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.494693323969841, + "eval_loss": 0.5423477292060852, + "eval_mean_token_accuracy": 0.8519259931949469, + "eval_num_tokens": 1889564.0, + "eval_runtime": 93.577, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 770 + }, + { + "entropy": 0.4363510547271326, + "epoch": 2.077972709551657, + "grad_norm": 3.9445464611053467, + "learning_rate": 6.905755456738823e-05, + "loss": 1.701644744873047, + "mean_token_accuracy": 0.8784636774254804, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.3984112246334553, + "epoch": 2.207927225471085, + "grad_norm": 2.7381982803344727, + "learning_rate": 6.842901653130672e-05, + "loss": 1.5607098388671874, + "mean_token_accuracy": 0.8866209423542023, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.40140418589115145, + "epoch": 2.3378817413905133, + "grad_norm": 3.321972131729126, + "learning_rate": 6.773338091488962e-05, + "loss": 1.5787547302246094, + "mean_token_accuracy": 0.8854541593790054, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.3955576819181442, + "epoch": 2.4678362573099415, + "grad_norm": 2.413280725479126, + "learning_rate": 6.697207707512082e-05, + "loss": 1.5567779541015625, + "mean_token_accuracy": 0.886657263636589, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.3982538402080536, + "epoch": 2.5977907732293697, + "grad_norm": 2.769259452819824, + "learning_rate": 6.614666930073788e-05, + "loss": 1.5671014404296875, + "mean_token_accuracy": 0.8863470497727394, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4086438773572445, + "epoch": 2.727745289148798, + "grad_norm": 2.636345624923706, + "learning_rate": 6.525885359801053e-05, + "loss": 1.5918766784667968, + "mean_token_accuracy": 0.8857174924015999, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.39681060910224913, + "epoch": 2.857699805068226, + "grad_norm": 2.708190679550171, + "learning_rate": 6.431045420587239e-05, + "loss": 1.55162841796875, + "mean_token_accuracy": 0.8879095411300659, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.41047369830310343, + "epoch": 2.9876543209876543, + "grad_norm": 2.741119623184204, + "learning_rate": 6.330341984756691e-05, + "loss": 1.607012939453125, + "mean_token_accuracy": 0.886100817322731, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.43975070491433144, + "eval_loss": 0.5563390254974365, + "eval_mean_token_accuracy": 0.8540356571857746, + "eval_num_tokens": 2834346.0, + "eval_runtime": 93.6235, + "eval_samples_per_second": 17.699, + "eval_steps_per_second": 2.222, + "step": 1155 + }, + { + "entropy": 0.3157867716634693, + "epoch": 3.116959064327485, + "grad_norm": 2.720665693283081, + "learning_rate": 6.223981972650901e-05, + "loss": 1.2099590301513672, + "mean_token_accuracy": 0.90753990621423, + "num_tokens": 2939773.0, + "step": 1200 + }, + { + "entropy": 0.30736712127923965, + "epoch": 3.246913580246914, + "grad_norm": 2.7423577308654785, + "learning_rate": 6.112183927459039e-05, + "loss": 1.1804743194580078, + "mean_token_accuracy": 0.9091711294651031, + "num_tokens": 3063617.0, + "step": 1250 + }, + { + "entropy": 0.31040274240076543, + "epoch": 3.3768680961663415, + "grad_norm": 2.4974067211151123, + "learning_rate": 5.995177566166415e-05, + "loss": 1.205712127685547, + "mean_token_accuracy": 0.9075283539295197, + "num_tokens": 3189800.0, + "step": 1300 + }, + { + "entropy": 0.3108955673873425, + "epoch": 3.50682261208577, + "grad_norm": 5.503927707672119, + "learning_rate": 5.873203307543615e-05, + "loss": 1.2054142761230469, + "mean_token_accuracy": 0.9072180911898613, + "num_tokens": 3311908.0, + "step": 1350 + }, + { + "entropy": 0.3123178996890783, + "epoch": 3.636777128005198, + "grad_norm": 3.6903016567230225, + "learning_rate": 5.746511778146122e-05, + "loss": 1.2007347869873046, + "mean_token_accuracy": 0.9080208915472031, + "num_tokens": 3432934.0, + "step": 1400 + }, + { + "entropy": 0.30725445054471495, + "epoch": 3.7667316439246266, + "grad_norm": 3.651270627975464, + "learning_rate": 5.615363297339524e-05, + "loss": 1.2028478240966798, + "mean_token_accuracy": 0.9079937645792961, + "num_tokens": 3561408.0, + "step": 1450 + }, + { + "entropy": 0.3093853303790092, + "epoch": 3.8966861598440543, + "grad_norm": 3.8038971424102783, + "learning_rate": 5.480027342408394e-05, + "loss": 1.2103264617919922, + "mean_token_accuracy": 0.9064547646045685, + "num_tokens": 3684624.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.3824075059248851, + "eval_loss": 0.596433162689209, + "eval_mean_token_accuracy": 0.8518055370793893, + "eval_num_tokens": 3779128.0, + "eval_runtime": 93.8991, + "eval_samples_per_second": 17.647, + "eval_steps_per_second": 2.215, + "step": 1540 + }, + { + "entropy": 0.29751914612312413, + "epoch": 4.025990903183885, + "grad_norm": 3.8319175243377686, + "learning_rate": 5.340781994847969e-05, + "loss": 1.1474579620361327, + "mean_token_accuracy": 0.9113474434344613, + "num_tokens": 3804993.0, + "step": 1550 + }, + { + "entropy": 0.22374473124742508, + "epoch": 4.155945419103314, + "grad_norm": 4.0295023918151855, + "learning_rate": 5.197913368976308e-05, + "loss": 0.8647676849365235, + "mean_token_accuracy": 0.9303225663304329, + "num_tokens": 3928162.0, + "step": 1600 + }, + { + "entropy": 0.2204760881140828, + "epoch": 4.2858999350227425, + "grad_norm": 2.7091262340545654, + "learning_rate": 5.0517150240410225e-05, + "loss": 0.8546311950683594, + "mean_token_accuracy": 0.9325515595078469, + "num_tokens": 4049012.0, + "step": 1650 + }, + { + "entropy": 0.22379870273172855, + "epoch": 4.41585445094217, + "grad_norm": 3.676753044128418, + "learning_rate": 4.90248736102854e-05, + "loss": 0.8702528381347656, + "mean_token_accuracy": 0.9302561604976654, + "num_tokens": 4167845.0, + "step": 1700 + }, + { + "entropy": 0.22574612841010094, + "epoch": 4.545808966861598, + "grad_norm": 3.4745290279388428, + "learning_rate": 4.750537005415305e-05, + "loss": 0.8742953491210937, + "mean_token_accuracy": 0.9304392230510712, + "num_tokens": 4287505.0, + "step": 1750 + }, + { + "entropy": 0.2232594309747219, + "epoch": 4.675763482781027, + "grad_norm": 2.898700475692749, + "learning_rate": 4.596176177129214e-05, + "loss": 0.8619278717041016, + "mean_token_accuracy": 0.930913093984127, + "num_tokens": 4412221.0, + "step": 1800 + }, + { + "entropy": 0.22699439719319345, + "epoch": 4.805717998700455, + "grad_norm": 4.208059310913086, + "learning_rate": 4.4397220490158516e-05, + "loss": 0.8736541748046875, + "mean_token_accuracy": 0.9296354326605797, + "num_tokens": 4537178.0, + "step": 1850 + }, + { + "entropy": 0.22327437780797482, + "epoch": 4.935672514619883, + "grad_norm": 4.770328998565674, + "learning_rate": 4.281496095127722e-05, + "loss": 0.8705735778808594, + "mean_token_accuracy": 0.9296216520667077, + "num_tokens": 4664196.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.30851531530228943, + "eval_loss": 0.7023613452911377, + "eval_mean_token_accuracy": 0.8488265181390139, + "eval_num_tokens": 4723910.0, + "eval_runtime": 93.5227, + "eval_samples_per_second": 17.718, + "eval_steps_per_second": 2.224, + "step": 1925 + }, + { + "entropy": 0.1837335507848754, + "epoch": 5.064977257959714, + "grad_norm": 3.1876420974731445, + "learning_rate": 4.1218234301755803e-05, + "loss": 0.7019867706298828, + "mean_token_accuracy": 0.943129480484143, + "num_tokens": 4789568.0, + "step": 1950 + }, + { + "entropy": 0.1513645588979125, + "epoch": 5.1949317738791425, + "grad_norm": 5.2119059562683105, + "learning_rate": 3.961032141499117e-05, + "loss": 0.5882163619995118, + "mean_token_accuracy": 0.9523130711913109, + "num_tokens": 4913407.0, + "step": 2000 + }, + { + "entropy": 0.1646271352469921, + "epoch": 5.32488628979857, + "grad_norm": 3.4650797843933105, + "learning_rate": 3.799452614929641e-05, + "loss": 0.6260359191894531, + "mean_token_accuracy": 0.9499588277935982, + "num_tokens": 5032951.0, + "step": 2050 + }, + { + "entropy": 0.1620502556487918, + "epoch": 5.454840805717999, + "grad_norm": 4.383810997009277, + "learning_rate": 3.637416855929934e-05, + "loss": 0.619534568786621, + "mean_token_accuracy": 0.9506754752993584, + "num_tokens": 5156908.0, + "step": 2100 + }, + { + "entropy": 0.16391102869063615, + "epoch": 5.584795321637427, + "grad_norm": 3.5354411602020264, + "learning_rate": 3.475257807406162e-05, + "loss": 0.631606559753418, + "mean_token_accuracy": 0.9495515289902687, + "num_tokens": 5276087.0, + "step": 2150 + }, + { + "entropy": 0.15723124787211418, + "epoch": 5.714749837556855, + "grad_norm": 2.841092824935913, + "learning_rate": 3.313308665593597e-05, + "loss": 0.605863380432129, + "mean_token_accuracy": 0.9508947885036468, + "num_tokens": 5398787.0, + "step": 2200 + }, + { + "entropy": 0.15846401400864124, + "epoch": 5.844704353476283, + "grad_norm": 3.759674549102783, + "learning_rate": 3.151902195421776e-05, + "loss": 0.6139367294311523, + "mean_token_accuracy": 0.9504388865828514, + "num_tokens": 5522332.0, + "step": 2250 + }, + { + "entropy": 0.16854628551751374, + "epoch": 5.974658869395712, + "grad_norm": 3.469398260116577, + "learning_rate": 2.991370046765923e-05, + "loss": 0.6406163787841797, + "mean_token_accuracy": 0.9481290274858475, + "num_tokens": 5642361.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.2671854439406441, + "eval_loss": 0.8231481909751892, + "eval_mean_token_accuracy": 0.8452930894608681, + "eval_num_tokens": 5668692.0, + "eval_runtime": 93.4211, + "eval_samples_per_second": 17.737, + "eval_steps_per_second": 2.226, + "step": 2310 + }, + { + "entropy": 0.12965927387227366, + "epoch": 6.1039636127355426, + "grad_norm": 4.258986473083496, + "learning_rate": 2.8320420729895018e-05, + "loss": 0.48968967437744143, + "mean_token_accuracy": 0.960972131197177, + "num_tokens": 5766129.0, + "step": 2350 + }, + { + "entropy": 0.1159947993606329, + "epoch": 6.23391812865497, + "grad_norm": 2.889965534210205, + "learning_rate": 2.6742456531781548e-05, + "loss": 0.43932266235351564, + "mean_token_accuracy": 0.9663794213533401, + "num_tokens": 5888746.0, + "step": 2400 + }, + { + "entropy": 0.11464667120948434, + "epoch": 6.363872644574399, + "grad_norm": 2.719203472137451, + "learning_rate": 2.518305019457679e-05, + "loss": 0.42842094421386717, + "mean_token_accuracy": 0.9660682818293571, + "num_tokens": 6014455.0, + "step": 2450 + }, + { + "entropy": 0.11858201997354627, + "epoch": 6.493827160493828, + "grad_norm": 3.611467123031616, + "learning_rate": 2.3645405907781953e-05, + "loss": 0.45053131103515626, + "mean_token_accuracy": 0.965420377254486, + "num_tokens": 6141229.0, + "step": 2500 + }, + { + "entropy": 0.12635288257151842, + "epoch": 6.623781676413255, + "grad_norm": 3.836108446121216, + "learning_rate": 2.213268314533456e-05, + "loss": 0.47164119720458986, + "mean_token_accuracy": 0.9634545907378197, + "num_tokens": 6257983.0, + "step": 2550 + }, + { + "entropy": 0.12443709814921021, + "epoch": 6.753736192332683, + "grad_norm": 3.9949610233306885, + "learning_rate": 2.0647990173680608e-05, + "loss": 0.4821424865722656, + "mean_token_accuracy": 0.9620693147182464, + "num_tokens": 6376198.0, + "step": 2600 + }, + { + "entropy": 0.11832262013107538, + "epoch": 6.883690708252112, + "grad_norm": 2.5650837421417236, + "learning_rate": 1.9194377665065605e-05, + "loss": 0.44591110229492187, + "mean_token_accuracy": 0.9656608006358147, + "num_tokens": 6502526.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.2215800589093795, + "eval_loss": 0.9339411854743958, + "eval_mean_token_accuracy": 0.8444493430165144, + "eval_num_tokens": 6613474.0, + "eval_runtime": 93.7158, + "eval_samples_per_second": 17.681, + "eval_steps_per_second": 2.219, + "step": 2695 + }, + { + "entropy": 0.11301927620442069, + "epoch": 7.012995451591943, + "grad_norm": 2.6045796871185303, + "learning_rate": 1.7774832429166907e-05, + "loss": 0.4257319259643555, + "mean_token_accuracy": 0.967041172274393, + "num_tokens": 6626464.0, + "step": 2700 + }, + { + "entropy": 0.09376694705337286, + "epoch": 7.142949967511371, + "grad_norm": 3.673069715499878, + "learning_rate": 1.639227127594797e-05, + "loss": 0.3493629837036133, + "mean_token_accuracy": 0.9737332627177239, + "num_tokens": 6746276.0, + "step": 2750 + }, + { + "entropy": 0.0937589898519218, + "epoch": 7.272904483430799, + "grad_norm": 1.284744381904602, + "learning_rate": 1.504953502234417e-05, + "loss": 0.3494458770751953, + "mean_token_accuracy": 0.9736128148436546, + "num_tokens": 6868131.0, + "step": 2800 + }, + { + "entropy": 0.09080767655745149, + "epoch": 7.402858999350228, + "grad_norm": 2.304025888442993, + "learning_rate": 1.3749382655095595e-05, + "loss": 0.3455091094970703, + "mean_token_accuracy": 0.9737611535191536, + "num_tokens": 6993803.0, + "step": 2850 + }, + { + "entropy": 0.09955240281298756, + "epoch": 7.532813515269655, + "grad_norm": 2.3498237133026123, + "learning_rate": 1.2494485661720107e-05, + "loss": 0.37081523895263674, + "mean_token_accuracy": 0.9723404514789581, + "num_tokens": 7113063.0, + "step": 2900 + }, + { + "entropy": 0.09484823819249869, + "epoch": 7.662768031189084, + "grad_norm": 2.8144359588623047, + "learning_rate": 1.1287422541275542e-05, + "loss": 0.3587993621826172, + "mean_token_accuracy": 0.9728687980771065, + "num_tokens": 7237174.0, + "step": 2950 + }, + { + "entropy": 0.09782332878559828, + "epoch": 7.792722547108512, + "grad_norm": 2.4676949977874756, + "learning_rate": 1.0130673506189897e-05, + "loss": 0.36967514038085936, + "mean_token_accuracy": 0.9725084081292152, + "num_tokens": 7357301.0, + "step": 3000 + }, + { + "entropy": 0.0982075690664351, + "epoch": 7.92267706302794, + "grad_norm": 2.4878036975860596, + "learning_rate": 9.026615386045925e-06, + "loss": 0.3717473602294922, + "mean_token_accuracy": 0.9727215927839279, + "num_tokens": 7482431.0, + "step": 3050 + }, + { + "epoch": 8.0, + "eval_entropy": 0.18732091820297334, + "eval_loss": 1.0530312061309814, + "eval_mean_token_accuracy": 0.8431636665302974, + "eval_num_tokens": 7558256.0, + "eval_runtime": 93.2972, + "eval_samples_per_second": 17.76, + "eval_steps_per_second": 2.229, + "step": 3080 + }, + { + "entropy": 0.09050090180419797, + "epoch": 8.05198180636777, + "grad_norm": 1.581737995147705, + "learning_rate": 7.977516743791511e-06, + "loss": 0.3330033493041992, + "mean_token_accuracy": 0.9748336343309987, + "num_tokens": 7607307.0, + "step": 3100 + }, + { + "entropy": 0.0836846032179892, + "epoch": 8.1819363222872, + "grad_norm": 2.2539267539978027, + "learning_rate": 6.9855332144109845e-06, + "loss": 0.3167022705078125, + "mean_token_accuracy": 0.9763787424564362, + "num_tokens": 7727840.0, + "step": 3150 + }, + { + "entropy": 0.08659086996689438, + "epoch": 8.311890838206628, + "grad_norm": 1.3999571800231934, + "learning_rate": 6.052703075635024e-06, + "loss": 0.31993478775024414, + "mean_token_accuracy": 0.9760957387089729, + "num_tokens": 7848468.0, + "step": 3200 + }, + { + "entropy": 0.08405340114608408, + "epoch": 8.441845354126055, + "grad_norm": 1.0389668941497803, + "learning_rate": 5.180943059790417e-06, + "loss": 0.31426584243774414, + "mean_token_accuracy": 0.9764248445630074, + "num_tokens": 7971324.0, + "step": 3250 + }, + { + "entropy": 0.09312817815691232, + "epoch": 8.571799870045485, + "grad_norm": 2.099010467529297, + "learning_rate": 4.372044415395074e-06, + "loss": 0.3411895370483398, + "mean_token_accuracy": 0.9753422957658767, + "num_tokens": 8086025.0, + "step": 3300 + }, + { + "entropy": 0.08220499983988702, + "epoch": 8.701754385964913, + "grad_norm": 2.398397445678711, + "learning_rate": 3.627669226590941e-06, + "loss": 0.3075200843811035, + "mean_token_accuracy": 0.9770644724369049, + "num_tokens": 8213441.0, + "step": 3350 + }, + { + "entropy": 0.0838418971374631, + "epoch": 8.83170890188434, + "grad_norm": 1.176540493965149, + "learning_rate": 2.949346997977317e-06, + "loss": 0.31322656631469725, + "mean_token_accuracy": 0.9763040187954902, + "num_tokens": 8340533.0, + "step": 3400 + }, + { + "entropy": 0.08329285142943263, + "epoch": 8.961663417803768, + "grad_norm": 1.3044288158416748, + "learning_rate": 2.338471511861953e-06, + "loss": 0.3066983985900879, + "mean_token_accuracy": 0.9766682273149491, + "num_tokens": 8468264.0, + "step": 3450 + }, + { + "epoch": 9.0, + "eval_entropy": 0.16551104488854224, + "eval_loss": 1.1869975328445435, + "eval_mean_token_accuracy": 0.841982166067912, + "eval_num_tokens": 8503038.0, + "eval_runtime": 93.2522, + "eval_samples_per_second": 17.769, + "eval_steps_per_second": 2.231, + "step": 3465 + }, + { + "entropy": 0.07780357189102088, + "epoch": 9.0909681611436, + "grad_norm": 1.4474356174468994, + "learning_rate": 1.7962979643874071e-06, + "loss": 0.28267601013183596, + "mean_token_accuracy": 0.9786822903695418, + "num_tokens": 8590639.0, + "step": 3500 + }, + { + "entropy": 0.07642922284081578, + "epoch": 9.220922677063028, + "grad_norm": 1.1418468952178955, + "learning_rate": 1.3239403864173652e-06, + "loss": 0.28310337066650393, + "mean_token_accuracy": 0.9780851683020592, + "num_tokens": 8717381.0, + "step": 3550 + }, + { + "entropy": 0.08197338008321822, + "epoch": 9.350877192982455, + "grad_norm": 1.8400335311889648, + "learning_rate": 9.223693544821523e-07, + "loss": 0.3019949722290039, + "mean_token_accuracy": 0.9771037146449089, + "num_tokens": 8842405.0, + "step": 3600 + }, + { + "entropy": 0.0859423121996224, + "epoch": 9.480831708901885, + "grad_norm": 0.8777729868888855, + "learning_rate": 5.924099964870779e-07, + "loss": 0.31166439056396483, + "mean_token_accuracy": 0.9766475519537926, + "num_tokens": 8962503.0, + "step": 3650 + }, + { + "entropy": 0.08218811591155827, + "epoch": 9.610786224821313, + "grad_norm": 1.7506035566329956, + "learning_rate": 3.347402962811754e-07, + "loss": 0.30174663543701175, + "mean_token_accuracy": 0.9769721442461013, + "num_tokens": 9080522.0, + "step": 3700 + }, + { + "entropy": 0.08101004351861775, + "epoch": 9.74074074074074, + "grad_norm": 1.0798615217208862, + "learning_rate": 1.498897005702096e-07, + "loss": 0.29830049514770507, + "mean_token_accuracy": 0.97772067040205, + "num_tokens": 9202377.0, + "step": 3750 + }, + { + "entropy": 0.07980371115729212, + "epoch": 9.870695256660168, + "grad_norm": 1.1633610725402832, + "learning_rate": 3.82380310362807e-08, + "loss": 0.29861265182495117, + "mean_token_accuracy": 0.9780822241306305, + "num_tokens": 9323644.0, + "step": 3800 + }, + { + "entropy": 0.0809034345081853, + "epoch": 10.0, + "grad_norm": 2.1446292400360107, + "learning_rate": 1.470389938404346e-11, + "loss": 0.29428337097167967, + "mean_token_accuracy": 0.9779826738127512, + "num_tokens": 9447820.0, + "step": 3850 + }, + { + "epoch": 10.0, + "eval_entropy": 0.15474601140102515, + "eval_loss": 1.2656937837600708, + "eval_mean_token_accuracy": 0.8407013264413064, + "eval_num_tokens": 9447820.0, + "eval_runtime": 93.4529, + "eval_samples_per_second": 17.731, + "eval_steps_per_second": 2.226, + "step": 3850 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.3488848358810015e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..278e09723cd4cd09ce8951dd20ae126a89306eb0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.08758287981835962, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fb79dd1349d058afa114b026a60772fea47a95f3 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test1/checkpoint-770/trainer_state.json @@ -0,0 +1,206 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 770, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.4003965058922767, + "epoch": 0.1299545159194282, + "grad_norm": 6.85750150680542, + "learning_rate": 9.106146360493497e-06, + "loss": 5.61093017578125, + "mean_token_accuracy": 0.7215492802858353, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.6890088641643524, + "epoch": 0.2599090318388564, + "grad_norm": 4.675379753112793, + "learning_rate": 1.8398132442629718e-05, + "loss": 2.6707302856445314, + "mean_token_accuracy": 0.831542606651783, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.6121356458961964, + "epoch": 0.3898635477582846, + "grad_norm": 4.716741561889648, + "learning_rate": 2.7690118524765944e-05, + "loss": 2.3951219177246093, + "mean_token_accuracy": 0.8436042138934136, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5764433582127094, + "epoch": 0.5198180636777128, + "grad_norm": 6.604654312133789, + "learning_rate": 3.698210460690216e-05, + "loss": 2.233437194824219, + "mean_token_accuracy": 0.8491340172290802, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5845598143339157, + "epoch": 0.649772579597141, + "grad_norm": 142.01007080078125, + "learning_rate": 4.627409068903838e-05, + "loss": 2.5906100463867188, + "mean_token_accuracy": 0.8439102494716644, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5845901258289814, + "epoch": 0.7797270955165692, + "grad_norm": 3.459049940109253, + "learning_rate": 5.556607677117461e-05, + "loss": 2.3185415649414063, + "mean_token_accuracy": 0.8499650385975838, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5498752015829086, + "epoch": 0.9096816114359974, + "grad_norm": 3.871177911758423, + "learning_rate": 6.485806285331082e-05, + "loss": 2.170651702880859, + "mean_token_accuracy": 0.8542942544817924, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.578240382270171, + "eval_loss": 0.5769997239112854, + "eval_mean_token_accuracy": 0.8430235242614379, + "eval_num_tokens": 944782.0, + "eval_runtime": 93.5764, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 385 + }, + { + "entropy": 0.5479793678875544, + "epoch": 1.0389863547758285, + "grad_norm": 5.7315897941589355, + "learning_rate": 7.154541090666746e-05, + "loss": 2.099454345703125, + "mean_token_accuracy": 0.8584242837512912, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5112442304193974, + "epoch": 1.1689408706952567, + "grad_norm": 3.4090652465820312, + "learning_rate": 7.148808255369052e-05, + "loss": 1.984275360107422, + "mean_token_accuracy": 0.8656484684348107, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5141718898713589, + "epoch": 1.2988953866146848, + "grad_norm": 2.981410503387451, + "learning_rate": 7.135737100578312e-05, + "loss": 1.9902142333984374, + "mean_token_accuracy": 0.8631115168333053, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.51056716889143, + "epoch": 1.428849902534113, + "grad_norm": 2.724004030227661, + "learning_rate": 7.115354484244213e-05, + "loss": 1.9883427429199219, + "mean_token_accuracy": 0.8649928227066994, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.49679399371147154, + "epoch": 1.5588044184535412, + "grad_norm": 4.024278163909912, + "learning_rate": 7.08770228753891e-05, + "loss": 1.9636073303222656, + "mean_token_accuracy": 0.8658123564720154, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.48449113249778747, + "epoch": 1.6887589343729694, + "grad_norm": 6.3375043869018555, + "learning_rate": 7.052837328801699e-05, + "loss": 1.9092594909667968, + "mean_token_accuracy": 0.8693041172623635, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.49986490070819856, + "epoch": 1.8187134502923976, + "grad_norm": 2.8352301120758057, + "learning_rate": 7.010831246791564e-05, + "loss": 1.976134033203125, + "mean_token_accuracy": 0.865630615055561, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.4923377679288387, + "epoch": 1.9486679662118258, + "grad_norm": 3.2214341163635254, + "learning_rate": 6.96177035348746e-05, + "loss": 1.9487001037597655, + "mean_token_accuracy": 0.8670864734053612, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.494693323969841, + "eval_loss": 0.5423477292060852, + "eval_mean_token_accuracy": 0.8519259931949469, + "eval_num_tokens": 1889564.0, + "eval_runtime": 93.577, + "eval_samples_per_second": 17.707, + "eval_steps_per_second": 2.223, + "step": 770 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.709053895661266e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.00985279561940916, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b9eda3b2195959c768f537d8b84feb57058c4820 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3465/trainer_state.json @@ -0,0 +1,823 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 3465, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.353258643448353, + "epoch": 0.1299545159194282, + "grad_norm": 3.010725975036621, + "learning_rate": 4.8475852375026876e-05, + "loss": 5.475971069335937, + "mean_token_accuracy": 0.7263440760970116, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.649170914888382, + "epoch": 0.2599090318388564, + "grad_norm": 1.9099390506744385, + "learning_rate": 9.794100785974817e-05, + "loss": 2.55168701171875, + "mean_token_accuracy": 0.8364580717682838, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.5930788792669773, + "epoch": 0.3898635477582846, + "grad_norm": 2.1239051818847656, + "learning_rate": 0.0001474061633444695, + "loss": 2.3440716552734373, + "mean_token_accuracy": 0.8452290838956833, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5564522063732147, + "epoch": 0.5198180636777128, + "grad_norm": 411.71807861328125, + "learning_rate": 0.00019687131882919077, + "loss": 2.2838446044921876, + "mean_token_accuracy": 0.8498487600684166, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5539529167115689, + "epoch": 0.649772579597141, + "grad_norm": 2.1969902515411377, + "learning_rate": 0.0002463364743139121, + "loss": 2.675394287109375, + "mean_token_accuracy": 0.8430694487690925, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5719467167556286, + "epoch": 0.7797270955165692, + "grad_norm": 1.98796546459198, + "learning_rate": 0.00029580162979863343, + "loss": 2.2434300231933593, + "mean_token_accuracy": 0.851241897046566, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5502805083990097, + "epoch": 0.9096816114359974, + "grad_norm": 2.0211398601531982, + "learning_rate": 0.0003452667852833547, + "loss": 2.1729367065429686, + "mean_token_accuracy": 0.8554597494006156, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.5580813550891784, + "eval_loss": 0.5830356478691101, + "eval_mean_token_accuracy": 0.8432669037809739, + "eval_num_tokens": 944782.0, + "eval_runtime": 90.3664, + "eval_samples_per_second": 18.336, + "eval_steps_per_second": 2.302, + "step": 385 + }, + { + "entropy": 0.5498402091725987, + "epoch": 1.0389863547758285, + "grad_norm": 3.8034188747406006, + "learning_rate": 0.000380866355527619, + "loss": 2.113946990966797, + "mean_token_accuracy": 0.8578129452676629, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5182110907137394, + "epoch": 1.1689408706952567, + "grad_norm": 2.7830824851989746, + "learning_rate": 0.0003805611725593471, + "loss": 1.9833453369140626, + "mean_token_accuracy": 0.8656822636723518, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5260789206624031, + "epoch": 1.2988953866146848, + "grad_norm": 1.7993361949920654, + "learning_rate": 0.0003798653399371568, + "loss": 2.006897430419922, + "mean_token_accuracy": 0.8631055191159248, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.5327546864748001, + "epoch": 1.428849902534113, + "grad_norm": 1.7606678009033203, + "learning_rate": 0.0003787802874228295, + "loss": 2.020283050537109, + "mean_token_accuracy": 0.8638329988718033, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.5285360223054886, + "epoch": 1.5588044184535412, + "grad_norm": 4.76006555557251, + "learning_rate": 0.00037730824452755275, + "loss": 1.9987391662597656, + "mean_token_accuracy": 0.8644696187973022, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.5134804363548756, + "epoch": 1.6887589343729694, + "grad_norm": 1.8447264432907104, + "learning_rate": 0.000375452235930833, + "loss": 1.9669386291503905, + "mean_token_accuracy": 0.8659948265552521, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.5371069309115409, + "epoch": 1.8187134502923976, + "grad_norm": 1.6537392139434814, + "learning_rate": 0.00037321607526553675, + "loss": 2.0411550903320315, + "mean_token_accuracy": 0.8624854254722595, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.5270501750707627, + "epoch": 1.9486679662118258, + "grad_norm": 2.6990911960601807, + "learning_rate": 0.00037060435728183, + "loss": 2.015792236328125, + "mean_token_accuracy": 0.8631013777852058, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5477195472384875, + "eval_loss": 0.5585702657699585, + "eval_mean_token_accuracy": 0.8486175815073344, + "eval_num_tokens": 1889564.0, + "eval_runtime": 90.2194, + "eval_samples_per_second": 18.366, + "eval_steps_per_second": 2.305, + "step": 770 + }, + { + "entropy": 0.4782189565088282, + "epoch": 2.077972709551657, + "grad_norm": 2.041952610015869, + "learning_rate": 0.0003676224484061175, + "loss": 1.7843829345703126, + "mean_token_accuracy": 0.8739750406250881, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.4443667846918106, + "epoch": 2.207927225471085, + "grad_norm": 16.27313804626465, + "learning_rate": 0.00036427647571437996, + "loss": 1.6559255981445313, + "mean_token_accuracy": 0.8808386281132699, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.44861202985048293, + "epoch": 2.3378817413905133, + "grad_norm": 1.648870587348938, + "learning_rate": 0.0003605733143425679, + "loss": 1.677943878173828, + "mean_token_accuracy": 0.879555520415306, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.4568726105988026, + "epoch": 2.4678362573099415, + "grad_norm": 1.7573126554489136, + "learning_rate": 0.00035652057335991866, + "loss": 1.6760734558105468, + "mean_token_accuracy": 0.8791913360357284, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.44863338857889173, + "epoch": 2.5977907732293697, + "grad_norm": 1.8639047145843506, + "learning_rate": 0.00035212658013422465, + "loss": 1.6799411010742187, + "mean_token_accuracy": 0.8790675121545791, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4585830120742321, + "epoch": 2.727745289148798, + "grad_norm": 1.9825985431671143, + "learning_rate": 0.0003474003632211781, + "loss": 1.7172026062011718, + "mean_token_accuracy": 0.8782495930790901, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.45422692246735097, + "epoch": 2.857699805068226, + "grad_norm": 1.7149962186813354, + "learning_rate": 0.00034235163381294995, + "loss": 1.679084014892578, + "mean_token_accuracy": 0.8795321774482727, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.47297614574432373, + "epoch": 2.9876543209876543, + "grad_norm": 1.7435617446899414, + "learning_rate": 0.0003369907657841221, + "loss": 1.7386201477050782, + "mean_token_accuracy": 0.8779115182161331, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5031588454372607, + "eval_loss": 0.5551120638847351, + "eval_mean_token_accuracy": 0.8531603300227568, + "eval_num_tokens": 2834346.0, + "eval_runtime": 90.2397, + "eval_samples_per_second": 18.362, + "eval_steps_per_second": 2.305, + "step": 1155 + }, + { + "entropy": 0.37655152073457615, + "epoch": 3.116959064327485, + "grad_norm": 1.504384160041809, + "learning_rate": 0.0003313287743759729, + "loss": 1.3653451538085937, + "mean_token_accuracy": 0.8971295344769655, + "num_tokens": 2939773.0, + "step": 1200 + }, + { + "entropy": 0.37069276951253416, + "epoch": 3.246913580246914, + "grad_norm": 1.9665946960449219, + "learning_rate": 0.0003253772935629151, + "loss": 1.3458108520507812, + "mean_token_accuracy": 0.8982205548882485, + "num_tokens": 3063617.0, + "step": 1250 + }, + { + "entropy": 0.37295883789658546, + "epoch": 3.3768680961663415, + "grad_norm": 1.7501362562179565, + "learning_rate": 0.00031914855214759165, + "loss": 1.357562255859375, + "mean_token_accuracy": 0.8977113124728203, + "num_tokens": 3189800.0, + "step": 1300 + }, + { + "entropy": 0.3805788069963455, + "epoch": 3.50682261208577, + "grad_norm": 1.7277154922485352, + "learning_rate": 0.00031265534863374894, + "loss": 1.3735618591308594, + "mean_token_accuracy": 0.8962143072485924, + "num_tokens": 3311908.0, + "step": 1350 + }, + { + "entropy": 0.3840580120682716, + "epoch": 3.636777128005198, + "grad_norm": 2.2338802814483643, + "learning_rate": 0.0003059110249285165, + "loss": 1.3903216552734374, + "mean_token_accuracy": 0.8958476388454437, + "num_tokens": 3432934.0, + "step": 1400 + }, + { + "entropy": 0.37621145449578763, + "epoch": 3.7667316439246266, + "grad_norm": 1.9029661417007446, + "learning_rate": 0.00029892943892812944, + "loss": 1.3776657104492187, + "mean_token_accuracy": 0.8964926180243492, + "num_tokens": 3561408.0, + "step": 1450 + }, + { + "entropy": 0.3784803995490074, + "epoch": 3.8966861598440543, + "grad_norm": 2.089708089828491, + "learning_rate": 0.00029172493604342163, + "loss": 1.3816807556152344, + "mean_token_accuracy": 0.8962833172082901, + "num_tokens": 3684624.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.4351254403591156, + "eval_loss": 0.5814722180366516, + "eval_mean_token_accuracy": 0.8530604747625498, + "eval_num_tokens": 3779128.0, + "eval_runtime": 90.2232, + "eval_samples_per_second": 18.366, + "eval_steps_per_second": 2.305, + "step": 1540 + }, + { + "entropy": 0.36326556409423677, + "epoch": 4.025990903183885, + "grad_norm": 2.1354947090148926, + "learning_rate": 0.0002843123197235993, + "loss": 1.3295362854003907, + "mean_token_accuracy": 0.8993093811686913, + "num_tokens": 3804993.0, + "step": 1550 + }, + { + "entropy": 0.2879397062957287, + "epoch": 4.155945419103314, + "grad_norm": 2.201097011566162, + "learning_rate": 0.0002767068210388601, + "loss": 1.0272974395751953, + "mean_token_accuracy": 0.9182627710700035, + "num_tokens": 3928162.0, + "step": 1600 + }, + { + "entropy": 0.2848948486149311, + "epoch": 4.2858999350227425, + "grad_norm": 2.01479172706604, + "learning_rate": 0.000268924067384358, + "loss": 1.0278727722167968, + "mean_token_accuracy": 0.9194766515493393, + "num_tokens": 4049012.0, + "step": 1650 + }, + { + "entropy": 0.2940504560619593, + "epoch": 4.41585445094217, + "grad_norm": 2.0893027782440186, + "learning_rate": 0.00026098005036982003, + "loss": 1.0586751556396485, + "mean_token_accuracy": 0.9167885810136795, + "num_tokens": 4167845.0, + "step": 1700 + }, + { + "entropy": 0.293505182415247, + "epoch": 4.545808966861598, + "grad_norm": 1.6346389055252075, + "learning_rate": 0.0002528910929607928, + "loss": 1.0669570922851563, + "mean_token_accuracy": 0.9160876458883286, + "num_tokens": 4287505.0, + "step": 1750 + }, + { + "entropy": 0.2898535231500864, + "epoch": 4.675763482781027, + "grad_norm": 1.6645033359527588, + "learning_rate": 0.0002446738159390364, + "loss": 1.0582612609863282, + "mean_token_accuracy": 0.9177632886171341, + "num_tokens": 4412221.0, + "step": 1800 + }, + { + "entropy": 0.2842763290554285, + "epoch": 4.805717998700455, + "grad_norm": 2.4594268798828125, + "learning_rate": 0.0002363451037509798, + "loss": 1.0467537689208983, + "mean_token_accuracy": 0.9177608361840248, + "num_tokens": 4537178.0, + "step": 1850 + }, + { + "entropy": 0.284430123642087, + "epoch": 4.935672514619883, + "grad_norm": 2.1724514961242676, + "learning_rate": 0.00022792206981441223, + "loss": 1.0753899383544923, + "mean_token_accuracy": 0.915192686021328, + "num_tokens": 4664196.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.3632780872285366, + "eval_loss": 0.6438126564025879, + "eval_mean_token_accuracy": 0.8511462942338907, + "eval_num_tokens": 4723910.0, + "eval_runtime": 90.1846, + "eval_samples_per_second": 18.373, + "eval_steps_per_second": 2.306, + "step": 1925 + }, + { + "entropy": 0.23515464736139355, + "epoch": 5.064977257959714, + "grad_norm": 1.651587724685669, + "learning_rate": 0.00021942202135469513, + "loss": 0.8597064971923828, + "mean_token_accuracy": 0.9324622603517082, + "num_tokens": 4789568.0, + "step": 1950 + }, + { + "entropy": 0.1958953895419836, + "epoch": 5.1949317738791425, + "grad_norm": 1.923292636871338, + "learning_rate": 0.0002108624238427481, + "loss": 0.7188112640380859, + "mean_token_accuracy": 0.9416415295004845, + "num_tokens": 4913407.0, + "step": 2000 + }, + { + "entropy": 0.21068542070686816, + "epoch": 5.32488628979857, + "grad_norm": 2.299356460571289, + "learning_rate": 0.0002022608651078804, + "loss": 0.7712985229492187, + "mean_token_accuracy": 0.9386440163850784, + "num_tokens": 5032951.0, + "step": 2050 + }, + { + "entropy": 0.21234643168747425, + "epoch": 5.454840805717999, + "grad_norm": 2.2119295597076416, + "learning_rate": 0.00019363501919920608, + "loss": 0.7650181579589844, + "mean_token_accuracy": 0.938471505343914, + "num_tokens": 5156908.0, + "step": 2100 + }, + { + "entropy": 0.21658269092440605, + "epoch": 5.584795321637427, + "grad_norm": 1.5394288301467896, + "learning_rate": 0.00018500261006989887, + "loss": 0.7784209442138672, + "mean_token_accuracy": 0.9371598136425018, + "num_tokens": 5276087.0, + "step": 2150 + }, + { + "entropy": 0.2045296123996377, + "epoch": 5.714749837556855, + "grad_norm": 1.913680076599121, + "learning_rate": 0.00017638137515890763, + "loss": 0.7638166046142578, + "mean_token_accuracy": 0.9378301629424095, + "num_tokens": 5398787.0, + "step": 2200 + }, + { + "entropy": 0.20917976945638656, + "epoch": 5.844704353476283, + "grad_norm": 2.0847299098968506, + "learning_rate": 0.00016778902894496063, + "loss": 0.7631703186035156, + "mean_token_accuracy": 0.9387557968497277, + "num_tokens": 5522332.0, + "step": 2250 + }, + { + "entropy": 0.22262076318264007, + "epoch": 5.974658869395712, + "grad_norm": 2.1597352027893066, + "learning_rate": 0.0001592432265477485, + "loss": 0.798133773803711, + "mean_token_accuracy": 0.936034984588623, + "num_tokens": 5642361.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.31502799331568754, + "eval_loss": 0.7417300343513489, + "eval_mean_token_accuracy": 0.8477253922476218, + "eval_num_tokens": 5668692.0, + "eval_runtime": 90.4252, + "eval_samples_per_second": 18.325, + "eval_steps_per_second": 2.3, + "step": 2310 + }, + { + "entropy": 0.16796037876725795, + "epoch": 6.1039636127355426, + "grad_norm": 2.2228569984436035, + "learning_rate": 0.00015076152745107442, + "loss": 0.5835284805297851, + "mean_token_accuracy": 0.9529892874123463, + "num_tokens": 5766129.0, + "step": 2350 + }, + { + "entropy": 0.14919219192117453, + "epoch": 6.23391812865497, + "grad_norm": 1.408840298652649, + "learning_rate": 0.00014236135942251215, + "loss": 0.5310631561279296, + "mean_token_accuracy": 0.9586454060673714, + "num_tokens": 5888746.0, + "step": 2400 + }, + { + "entropy": 0.1499051059409976, + "epoch": 6.363872644574399, + "grad_norm": 1.8611102104187012, + "learning_rate": 0.00013405998270370849, + "loss": 0.5127810668945313, + "mean_token_accuracy": 0.9591325157880783, + "num_tokens": 6014455.0, + "step": 2450 + }, + { + "entropy": 0.15334193099290133, + "epoch": 6.493827160493828, + "grad_norm": 1.6051015853881836, + "learning_rate": 0.00012587445454490892, + "loss": 0.5349758529663086, + "mean_token_accuracy": 0.9574431091547012, + "num_tokens": 6141229.0, + "step": 2500 + }, + { + "entropy": 0.15982334002852439, + "epoch": 6.623781676413255, + "grad_norm": 3.7065205574035645, + "learning_rate": 0.00011782159415658008, + "loss": 0.5602469253540039, + "mean_token_accuracy": 0.9555372184515, + "num_tokens": 6257983.0, + "step": 2550 + }, + { + "entropy": 0.16072992872446776, + "epoch": 6.753736192332683, + "grad_norm": 2.282320976257324, + "learning_rate": 0.00010991794815014401, + "loss": 0.5657939910888672, + "mean_token_accuracy": 0.9550630164146423, + "num_tokens": 6376198.0, + "step": 2600 + }, + { + "entropy": 0.1512781011685729, + "epoch": 6.883690708252112, + "grad_norm": 1.3716893196105957, + "learning_rate": 0.00010217975653883603, + "loss": 0.5340792465209961, + "mean_token_accuracy": 0.9578188157081604, + "num_tokens": 6502526.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.2444461930829745, + "eval_loss": 0.8798949718475342, + "eval_mean_token_accuracy": 0.8457763839799625, + "eval_num_tokens": 6613474.0, + "eval_runtime": 90.2868, + "eval_samples_per_second": 18.353, + "eval_steps_per_second": 2.304, + "step": 2695 + }, + { + "entropy": 0.1444593005668578, + "epoch": 7.012995451591943, + "grad_norm": 1.0965569019317627, + "learning_rate": 9.462291936854386e-05, + "loss": 0.511833839416504, + "mean_token_accuracy": 0.9595773016388093, + "num_tokens": 6626464.0, + "step": 2700 + }, + { + "entropy": 0.10985541097819805, + "epoch": 7.142949967511371, + "grad_norm": 1.8079149723052979, + "learning_rate": 8.726296404719584e-05, + "loss": 0.3876673126220703, + "mean_token_accuracy": 0.9704919803142548, + "num_tokens": 6746276.0, + "step": 2750 + }, + { + "entropy": 0.11304264679551125, + "epoch": 7.272904483430799, + "grad_norm": 1.5228444337844849, + "learning_rate": 8.01150134398253e-05, + "loss": 0.39335052490234373, + "mean_token_accuracy": 0.9695766788721084, + "num_tokens": 6868131.0, + "step": 2800 + }, + { + "entropy": 0.11066193280741572, + "epoch": 7.402858999350228, + "grad_norm": 2.265174388885498, + "learning_rate": 7.319375479487112e-05, + "loss": 0.38289966583251955, + "mean_token_accuracy": 0.9707033503055572, + "num_tokens": 6993803.0, + "step": 2850 + }, + { + "entropy": 0.12022399662062526, + "epoch": 7.532813515269655, + "grad_norm": 1.0657345056533813, + "learning_rate": 6.65134095655596e-05, + "loss": 0.4089087677001953, + "mean_token_accuracy": 0.9689779531955719, + "num_tokens": 7113063.0, + "step": 2900 + }, + { + "entropy": 0.11429863104596734, + "epoch": 7.662768031189084, + "grad_norm": 1.3440358638763428, + "learning_rate": 6.008770418837973e-05, + "loss": 0.3935198593139648, + "mean_token_accuracy": 0.9698223957419395, + "num_tokens": 7237174.0, + "step": 2950 + }, + { + "entropy": 0.11748226622119545, + "epoch": 7.792722547108512, + "grad_norm": 1.4607034921646118, + "learning_rate": 5.3929841878693804e-05, + "loss": 0.40399799346923826, + "mean_token_accuracy": 0.9695871344208717, + "num_tokens": 7357301.0, + "step": 3000 + }, + { + "entropy": 0.11790506653487683, + "epoch": 7.92267706302794, + "grad_norm": 1.4574708938598633, + "learning_rate": 4.805247550143646e-05, + "loss": 0.4049314880371094, + "mean_token_accuracy": 0.9693469110131264, + "num_tokens": 7482431.0, + "step": 3050 + }, + { + "epoch": 8.0, + "eval_entropy": 0.2104659411483086, + "eval_loss": 0.9939886927604675, + "eval_mean_token_accuracy": 0.8444042455118436, + "eval_num_tokens": 7558256.0, + "eval_runtime": 90.3118, + "eval_samples_per_second": 18.348, + "eval_steps_per_second": 2.303, + "step": 3080 + }, + { + "entropy": 0.10608276399086468, + "epoch": 8.05198180636777, + "grad_norm": 1.4720594882965088, + "learning_rate": 4.246768157264548e-05, + "loss": 0.3582034683227539, + "mean_token_accuracy": 0.9727947966537284, + "num_tokens": 7607307.0, + "step": 3100 + }, + { + "entropy": 0.09448420397937297, + "epoch": 8.1819363222872, + "grad_norm": 1.006718397140503, + "learning_rate": 3.718693544524604e-05, + "loss": 0.3269464874267578, + "mean_token_accuracy": 0.976178829073906, + "num_tokens": 7727840.0, + "step": 3150 + }, + { + "entropy": 0.09565175730735063, + "epoch": 8.311890838206628, + "grad_norm": 1.0114370584487915, + "learning_rate": 3.222108773007395e-05, + "loss": 0.330229606628418, + "mean_token_accuracy": 0.974904423058033, + "num_tokens": 7848468.0, + "step": 3200 + }, + { + "entropy": 0.0942081324569881, + "epoch": 8.441845354126055, + "grad_norm": 0.8164042234420776, + "learning_rate": 2.7580342000587992e-05, + "loss": 0.32308143615722656, + "mean_token_accuracy": 0.9755518987774849, + "num_tokens": 7971324.0, + "step": 3250 + }, + { + "entropy": 0.10181596595793962, + "epoch": 8.571799870045485, + "grad_norm": 1.7562603950500488, + "learning_rate": 2.327423382708144e-05, + "loss": 0.3492561340332031, + "mean_token_accuracy": 0.9740999150276184, + "num_tokens": 8086025.0, + "step": 3300 + }, + { + "entropy": 0.09124232700094581, + "epoch": 8.701754385964913, + "grad_norm": 1.117050051689148, + "learning_rate": 1.9311611183473242e-05, + "loss": 0.31594392776489255, + "mean_token_accuracy": 0.9763092172145843, + "num_tokens": 8213441.0, + "step": 3350 + }, + { + "entropy": 0.0920115345157683, + "epoch": 8.83170890188434, + "grad_norm": 0.9090991616249084, + "learning_rate": 1.5700616266937413e-05, + "loss": 0.32107589721679686, + "mean_token_accuracy": 0.9758272641897201, + "num_tokens": 8340533.0, + "step": 3400 + }, + { + "entropy": 0.09023899069055914, + "epoch": 8.961663417803768, + "grad_norm": 0.7739766240119934, + "learning_rate": 1.244866876772696e-05, + "loss": 0.3140911674499512, + "mean_token_accuracy": 0.9761302083730697, + "num_tokens": 8468264.0, + "step": 3450 + }, + { + "epoch": 9.0, + "eval_entropy": 0.176344332141945, + "eval_loss": 1.1650298833847046, + "eval_mean_token_accuracy": 0.84236626756879, + "eval_num_tokens": 8503038.0, + "eval_runtime": 90.2504, + "eval_samples_per_second": 18.36, + "eval_steps_per_second": 2.305, + "step": 3465 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.9784575411164186e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.00985279561940916, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8d59671155c053f17d4974819c30fbeacf4401f0 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-385/trainer_state.json @@ -0,0 +1,115 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 385, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.353258643448353, + "epoch": 0.1299545159194282, + "grad_norm": 3.010725975036621, + "learning_rate": 4.8475852375026876e-05, + "loss": 5.475971069335937, + "mean_token_accuracy": 0.7263440760970116, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.649170914888382, + "epoch": 0.2599090318388564, + "grad_norm": 1.9099390506744385, + "learning_rate": 9.794100785974817e-05, + "loss": 2.55168701171875, + "mean_token_accuracy": 0.8364580717682838, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.5930788792669773, + "epoch": 0.3898635477582846, + "grad_norm": 2.1239051818847656, + "learning_rate": 0.0001474061633444695, + "loss": 2.3440716552734373, + "mean_token_accuracy": 0.8452290838956833, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5564522063732147, + "epoch": 0.5198180636777128, + "grad_norm": 411.71807861328125, + "learning_rate": 0.00019687131882919077, + "loss": 2.2838446044921876, + "mean_token_accuracy": 0.8498487600684166, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5539529167115689, + "epoch": 0.649772579597141, + "grad_norm": 2.1969902515411377, + "learning_rate": 0.0002463364743139121, + "loss": 2.675394287109375, + "mean_token_accuracy": 0.8430694487690925, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5719467167556286, + "epoch": 0.7797270955165692, + "grad_norm": 1.98796546459198, + "learning_rate": 0.00029580162979863343, + "loss": 2.2434300231933593, + "mean_token_accuracy": 0.851241897046566, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5502805083990097, + "epoch": 0.9096816114359974, + "grad_norm": 2.0211398601531982, + "learning_rate": 0.0003452667852833547, + "loss": 2.1729367065429686, + "mean_token_accuracy": 0.8554597494006156, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.5580813550891784, + "eval_loss": 0.5830356478691101, + "eval_mean_token_accuracy": 0.8432669037809739, + "eval_num_tokens": 944782.0, + "eval_runtime": 90.3664, + "eval_samples_per_second": 18.336, + "eval_steps_per_second": 2.302, + "step": 385 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.29790894354309e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.00985279561940916, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f278ca0ffb8910d5e95feaca5a469202e0338a7b --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-3850/trainer_state.json @@ -0,0 +1,914 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 3850, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.353258643448353, + "epoch": 0.1299545159194282, + "grad_norm": 3.010725975036621, + "learning_rate": 4.8475852375026876e-05, + "loss": 5.475971069335937, + "mean_token_accuracy": 0.7263440760970116, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.649170914888382, + "epoch": 0.2599090318388564, + "grad_norm": 1.9099390506744385, + "learning_rate": 9.794100785974817e-05, + "loss": 2.55168701171875, + "mean_token_accuracy": 0.8364580717682838, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.5930788792669773, + "epoch": 0.3898635477582846, + "grad_norm": 2.1239051818847656, + "learning_rate": 0.0001474061633444695, + "loss": 2.3440716552734373, + "mean_token_accuracy": 0.8452290838956833, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5564522063732147, + "epoch": 0.5198180636777128, + "grad_norm": 411.71807861328125, + "learning_rate": 0.00019687131882919077, + "loss": 2.2838446044921876, + "mean_token_accuracy": 0.8498487600684166, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5539529167115689, + "epoch": 0.649772579597141, + "grad_norm": 2.1969902515411377, + "learning_rate": 0.0002463364743139121, + "loss": 2.675394287109375, + "mean_token_accuracy": 0.8430694487690925, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5719467167556286, + "epoch": 0.7797270955165692, + "grad_norm": 1.98796546459198, + "learning_rate": 0.00029580162979863343, + "loss": 2.2434300231933593, + "mean_token_accuracy": 0.851241897046566, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5502805083990097, + "epoch": 0.9096816114359974, + "grad_norm": 2.0211398601531982, + "learning_rate": 0.0003452667852833547, + "loss": 2.1729367065429686, + "mean_token_accuracy": 0.8554597494006156, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.5580813550891784, + "eval_loss": 0.5830356478691101, + "eval_mean_token_accuracy": 0.8432669037809739, + "eval_num_tokens": 944782.0, + "eval_runtime": 90.3664, + "eval_samples_per_second": 18.336, + "eval_steps_per_second": 2.302, + "step": 385 + }, + { + "entropy": 0.5498402091725987, + "epoch": 1.0389863547758285, + "grad_norm": 3.8034188747406006, + "learning_rate": 0.000380866355527619, + "loss": 2.113946990966797, + "mean_token_accuracy": 0.8578129452676629, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5182110907137394, + "epoch": 1.1689408706952567, + "grad_norm": 2.7830824851989746, + "learning_rate": 0.0003805611725593471, + "loss": 1.9833453369140626, + "mean_token_accuracy": 0.8656822636723518, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5260789206624031, + "epoch": 1.2988953866146848, + "grad_norm": 1.7993361949920654, + "learning_rate": 0.0003798653399371568, + "loss": 2.006897430419922, + "mean_token_accuracy": 0.8631055191159248, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.5327546864748001, + "epoch": 1.428849902534113, + "grad_norm": 1.7606678009033203, + "learning_rate": 0.0003787802874228295, + "loss": 2.020283050537109, + "mean_token_accuracy": 0.8638329988718033, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.5285360223054886, + "epoch": 1.5588044184535412, + "grad_norm": 4.76006555557251, + "learning_rate": 0.00037730824452755275, + "loss": 1.9987391662597656, + "mean_token_accuracy": 0.8644696187973022, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.5134804363548756, + "epoch": 1.6887589343729694, + "grad_norm": 1.8447264432907104, + "learning_rate": 0.000375452235930833, + "loss": 1.9669386291503905, + "mean_token_accuracy": 0.8659948265552521, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.5371069309115409, + "epoch": 1.8187134502923976, + "grad_norm": 1.6537392139434814, + "learning_rate": 0.00037321607526553675, + "loss": 2.0411550903320315, + "mean_token_accuracy": 0.8624854254722595, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.5270501750707627, + "epoch": 1.9486679662118258, + "grad_norm": 2.6990911960601807, + "learning_rate": 0.00037060435728183, + "loss": 2.015792236328125, + "mean_token_accuracy": 0.8631013777852058, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5477195472384875, + "eval_loss": 0.5585702657699585, + "eval_mean_token_accuracy": 0.8486175815073344, + "eval_num_tokens": 1889564.0, + "eval_runtime": 90.2194, + "eval_samples_per_second": 18.366, + "eval_steps_per_second": 2.305, + "step": 770 + }, + { + "entropy": 0.4782189565088282, + "epoch": 2.077972709551657, + "grad_norm": 2.041952610015869, + "learning_rate": 0.0003676224484061175, + "loss": 1.7843829345703126, + "mean_token_accuracy": 0.8739750406250881, + "num_tokens": 1959778.0, + "step": 800 + }, + { + "entropy": 0.4443667846918106, + "epoch": 2.207927225471085, + "grad_norm": 16.27313804626465, + "learning_rate": 0.00036427647571437996, + "loss": 1.6559255981445313, + "mean_token_accuracy": 0.8808386281132699, + "num_tokens": 2087384.0, + "step": 850 + }, + { + "entropy": 0.44861202985048293, + "epoch": 2.3378817413905133, + "grad_norm": 1.648870587348938, + "learning_rate": 0.0003605733143425679, + "loss": 1.677943878173828, + "mean_token_accuracy": 0.879555520415306, + "num_tokens": 2211962.0, + "step": 900 + }, + { + "entropy": 0.4568726105988026, + "epoch": 2.4678362573099415, + "grad_norm": 1.7573126554489136, + "learning_rate": 0.00035652057335991866, + "loss": 1.6760734558105468, + "mean_token_accuracy": 0.8791913360357284, + "num_tokens": 2334838.0, + "step": 950 + }, + { + "entropy": 0.44863338857889173, + "epoch": 2.5977907732293697, + "grad_norm": 1.8639047145843506, + "learning_rate": 0.00035212658013422465, + "loss": 1.6799411010742187, + "mean_token_accuracy": 0.8790675121545791, + "num_tokens": 2461732.0, + "step": 1000 + }, + { + "entropy": 0.4585830120742321, + "epoch": 2.727745289148798, + "grad_norm": 1.9825985431671143, + "learning_rate": 0.0003474003632211781, + "loss": 1.7172026062011718, + "mean_token_accuracy": 0.8782495930790901, + "num_tokens": 2580026.0, + "step": 1050 + }, + { + "entropy": 0.45422692246735097, + "epoch": 2.857699805068226, + "grad_norm": 1.7149962186813354, + "learning_rate": 0.00034235163381294995, + "loss": 1.679084014892578, + "mean_token_accuracy": 0.8795321774482727, + "num_tokens": 2705600.0, + "step": 1100 + }, + { + "entropy": 0.47297614574432373, + "epoch": 2.9876543209876543, + "grad_norm": 1.7435617446899414, + "learning_rate": 0.0003369907657841221, + "loss": 1.7386201477050782, + "mean_token_accuracy": 0.8779115182161331, + "num_tokens": 2822808.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5031588454372607, + "eval_loss": 0.5551120638847351, + "eval_mean_token_accuracy": 0.8531603300227568, + "eval_num_tokens": 2834346.0, + "eval_runtime": 90.2397, + "eval_samples_per_second": 18.362, + "eval_steps_per_second": 2.305, + "step": 1155 + }, + { + "entropy": 0.37655152073457615, + "epoch": 3.116959064327485, + "grad_norm": 1.504384160041809, + "learning_rate": 0.0003313287743759729, + "loss": 1.3653451538085937, + "mean_token_accuracy": 0.8971295344769655, + "num_tokens": 2939773.0, + "step": 1200 + }, + { + "entropy": 0.37069276951253416, + "epoch": 3.246913580246914, + "grad_norm": 1.9665946960449219, + "learning_rate": 0.0003253772935629151, + "loss": 1.3458108520507812, + "mean_token_accuracy": 0.8982205548882485, + "num_tokens": 3063617.0, + "step": 1250 + }, + { + "entropy": 0.37295883789658546, + "epoch": 3.3768680961663415, + "grad_norm": 1.7501362562179565, + "learning_rate": 0.00031914855214759165, + "loss": 1.357562255859375, + "mean_token_accuracy": 0.8977113124728203, + "num_tokens": 3189800.0, + "step": 1300 + }, + { + "entropy": 0.3805788069963455, + "epoch": 3.50682261208577, + "grad_norm": 1.7277154922485352, + "learning_rate": 0.00031265534863374894, + "loss": 1.3735618591308594, + "mean_token_accuracy": 0.8962143072485924, + "num_tokens": 3311908.0, + "step": 1350 + }, + { + "entropy": 0.3840580120682716, + "epoch": 3.636777128005198, + "grad_norm": 2.2338802814483643, + "learning_rate": 0.0003059110249285165, + "loss": 1.3903216552734374, + "mean_token_accuracy": 0.8958476388454437, + "num_tokens": 3432934.0, + "step": 1400 + }, + { + "entropy": 0.37621145449578763, + "epoch": 3.7667316439246266, + "grad_norm": 1.9029661417007446, + "learning_rate": 0.00029892943892812944, + "loss": 1.3776657104492187, + "mean_token_accuracy": 0.8964926180243492, + "num_tokens": 3561408.0, + "step": 1450 + }, + { + "entropy": 0.3784803995490074, + "epoch": 3.8966861598440543, + "grad_norm": 2.089708089828491, + "learning_rate": 0.00029172493604342163, + "loss": 1.3816807556152344, + "mean_token_accuracy": 0.8962833172082901, + "num_tokens": 3684624.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.4351254403591156, + "eval_loss": 0.5814722180366516, + "eval_mean_token_accuracy": 0.8530604747625498, + "eval_num_tokens": 3779128.0, + "eval_runtime": 90.2232, + "eval_samples_per_second": 18.366, + "eval_steps_per_second": 2.305, + "step": 1540 + }, + { + "entropy": 0.36326556409423677, + "epoch": 4.025990903183885, + "grad_norm": 2.1354947090148926, + "learning_rate": 0.0002843123197235993, + "loss": 1.3295362854003907, + "mean_token_accuracy": 0.8993093811686913, + "num_tokens": 3804993.0, + "step": 1550 + }, + { + "entropy": 0.2879397062957287, + "epoch": 4.155945419103314, + "grad_norm": 2.201097011566162, + "learning_rate": 0.0002767068210388601, + "loss": 1.0272974395751953, + "mean_token_accuracy": 0.9182627710700035, + "num_tokens": 3928162.0, + "step": 1600 + }, + { + "entropy": 0.2848948486149311, + "epoch": 4.2858999350227425, + "grad_norm": 2.01479172706604, + "learning_rate": 0.000268924067384358, + "loss": 1.0278727722167968, + "mean_token_accuracy": 0.9194766515493393, + "num_tokens": 4049012.0, + "step": 1650 + }, + { + "entropy": 0.2940504560619593, + "epoch": 4.41585445094217, + "grad_norm": 2.0893027782440186, + "learning_rate": 0.00026098005036982003, + "loss": 1.0586751556396485, + "mean_token_accuracy": 0.9167885810136795, + "num_tokens": 4167845.0, + "step": 1700 + }, + { + "entropy": 0.293505182415247, + "epoch": 4.545808966861598, + "grad_norm": 1.6346389055252075, + "learning_rate": 0.0002528910929607928, + "loss": 1.0669570922851563, + "mean_token_accuracy": 0.9160876458883286, + "num_tokens": 4287505.0, + "step": 1750 + }, + { + "entropy": 0.2898535231500864, + "epoch": 4.675763482781027, + "grad_norm": 1.6645033359527588, + "learning_rate": 0.0002446738159390364, + "loss": 1.0582612609863282, + "mean_token_accuracy": 0.9177632886171341, + "num_tokens": 4412221.0, + "step": 1800 + }, + { + "entropy": 0.2842763290554285, + "epoch": 4.805717998700455, + "grad_norm": 2.4594268798828125, + "learning_rate": 0.0002363451037509798, + "loss": 1.0467537689208983, + "mean_token_accuracy": 0.9177608361840248, + "num_tokens": 4537178.0, + "step": 1850 + }, + { + "entropy": 0.284430123642087, + "epoch": 4.935672514619883, + "grad_norm": 2.1724514961242676, + "learning_rate": 0.00022792206981441223, + "loss": 1.0753899383544923, + "mean_token_accuracy": 0.915192686021328, + "num_tokens": 4664196.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.3632780872285366, + "eval_loss": 0.6438126564025879, + "eval_mean_token_accuracy": 0.8511462942338907, + "eval_num_tokens": 4723910.0, + "eval_runtime": 90.1846, + "eval_samples_per_second": 18.373, + "eval_steps_per_second": 2.306, + "step": 1925 + }, + { + "entropy": 0.23515464736139355, + "epoch": 5.064977257959714, + "grad_norm": 1.651587724685669, + "learning_rate": 0.00021942202135469513, + "loss": 0.8597064971923828, + "mean_token_accuracy": 0.9324622603517082, + "num_tokens": 4789568.0, + "step": 1950 + }, + { + "entropy": 0.1958953895419836, + "epoch": 5.1949317738791425, + "grad_norm": 1.923292636871338, + "learning_rate": 0.0002108624238427481, + "loss": 0.7188112640380859, + "mean_token_accuracy": 0.9416415295004845, + "num_tokens": 4913407.0, + "step": 2000 + }, + { + "entropy": 0.21068542070686816, + "epoch": 5.32488628979857, + "grad_norm": 2.299356460571289, + "learning_rate": 0.0002022608651078804, + "loss": 0.7712985229492187, + "mean_token_accuracy": 0.9386440163850784, + "num_tokens": 5032951.0, + "step": 2050 + }, + { + "entropy": 0.21234643168747425, + "epoch": 5.454840805717999, + "grad_norm": 2.2119295597076416, + "learning_rate": 0.00019363501919920608, + "loss": 0.7650181579589844, + "mean_token_accuracy": 0.938471505343914, + "num_tokens": 5156908.0, + "step": 2100 + }, + { + "entropy": 0.21658269092440605, + "epoch": 5.584795321637427, + "grad_norm": 1.5394288301467896, + "learning_rate": 0.00018500261006989887, + "loss": 0.7784209442138672, + "mean_token_accuracy": 0.9371598136425018, + "num_tokens": 5276087.0, + "step": 2150 + }, + { + "entropy": 0.2045296123996377, + "epoch": 5.714749837556855, + "grad_norm": 1.913680076599121, + "learning_rate": 0.00017638137515890763, + "loss": 0.7638166046142578, + "mean_token_accuracy": 0.9378301629424095, + "num_tokens": 5398787.0, + "step": 2200 + }, + { + "entropy": 0.20917976945638656, + "epoch": 5.844704353476283, + "grad_norm": 2.0847299098968506, + "learning_rate": 0.00016778902894496063, + "loss": 0.7631703186035156, + "mean_token_accuracy": 0.9387557968497277, + "num_tokens": 5522332.0, + "step": 2250 + }, + { + "entropy": 0.22262076318264007, + "epoch": 5.974658869395712, + "grad_norm": 2.1597352027893066, + "learning_rate": 0.0001592432265477485, + "loss": 0.798133773803711, + "mean_token_accuracy": 0.936034984588623, + "num_tokens": 5642361.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.31502799331568754, + "eval_loss": 0.7417300343513489, + "eval_mean_token_accuracy": 0.8477253922476218, + "eval_num_tokens": 5668692.0, + "eval_runtime": 90.4252, + "eval_samples_per_second": 18.325, + "eval_steps_per_second": 2.3, + "step": 2310 + }, + { + "entropy": 0.16796037876725795, + "epoch": 6.1039636127355426, + "grad_norm": 2.2228569984436035, + "learning_rate": 0.00015076152745107442, + "loss": 0.5835284805297851, + "mean_token_accuracy": 0.9529892874123463, + "num_tokens": 5766129.0, + "step": 2350 + }, + { + "entropy": 0.14919219192117453, + "epoch": 6.23391812865497, + "grad_norm": 1.408840298652649, + "learning_rate": 0.00014236135942251215, + "loss": 0.5310631561279296, + "mean_token_accuracy": 0.9586454060673714, + "num_tokens": 5888746.0, + "step": 2400 + }, + { + "entropy": 0.1499051059409976, + "epoch": 6.363872644574399, + "grad_norm": 1.8611102104187012, + "learning_rate": 0.00013405998270370849, + "loss": 0.5127810668945313, + "mean_token_accuracy": 0.9591325157880783, + "num_tokens": 6014455.0, + "step": 2450 + }, + { + "entropy": 0.15334193099290133, + "epoch": 6.493827160493828, + "grad_norm": 1.6051015853881836, + "learning_rate": 0.00012587445454490892, + "loss": 0.5349758529663086, + "mean_token_accuracy": 0.9574431091547012, + "num_tokens": 6141229.0, + "step": 2500 + }, + { + "entropy": 0.15982334002852439, + "epoch": 6.623781676413255, + "grad_norm": 3.7065205574035645, + "learning_rate": 0.00011782159415658008, + "loss": 0.5602469253540039, + "mean_token_accuracy": 0.9555372184515, + "num_tokens": 6257983.0, + "step": 2550 + }, + { + "entropy": 0.16072992872446776, + "epoch": 6.753736192332683, + "grad_norm": 2.282320976257324, + "learning_rate": 0.00010991794815014401, + "loss": 0.5657939910888672, + "mean_token_accuracy": 0.9550630164146423, + "num_tokens": 6376198.0, + "step": 2600 + }, + { + "entropy": 0.1512781011685729, + "epoch": 6.883690708252112, + "grad_norm": 1.3716893196105957, + "learning_rate": 0.00010217975653883603, + "loss": 0.5340792465209961, + "mean_token_accuracy": 0.9578188157081604, + "num_tokens": 6502526.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.2444461930829745, + "eval_loss": 0.8798949718475342, + "eval_mean_token_accuracy": 0.8457763839799625, + "eval_num_tokens": 6613474.0, + "eval_runtime": 90.2868, + "eval_samples_per_second": 18.353, + "eval_steps_per_second": 2.304, + "step": 2695 + }, + { + "entropy": 0.1444593005668578, + "epoch": 7.012995451591943, + "grad_norm": 1.0965569019317627, + "learning_rate": 9.462291936854386e-05, + "loss": 0.511833839416504, + "mean_token_accuracy": 0.9595773016388093, + "num_tokens": 6626464.0, + "step": 2700 + }, + { + "entropy": 0.10985541097819805, + "epoch": 7.142949967511371, + "grad_norm": 1.8079149723052979, + "learning_rate": 8.726296404719584e-05, + "loss": 0.3876673126220703, + "mean_token_accuracy": 0.9704919803142548, + "num_tokens": 6746276.0, + "step": 2750 + }, + { + "entropy": 0.11304264679551125, + "epoch": 7.272904483430799, + "grad_norm": 1.5228444337844849, + "learning_rate": 8.01150134398253e-05, + "loss": 0.39335052490234373, + "mean_token_accuracy": 0.9695766788721084, + "num_tokens": 6868131.0, + "step": 2800 + }, + { + "entropy": 0.11066193280741572, + "epoch": 7.402858999350228, + "grad_norm": 2.265174388885498, + "learning_rate": 7.319375479487112e-05, + "loss": 0.38289966583251955, + "mean_token_accuracy": 0.9707033503055572, + "num_tokens": 6993803.0, + "step": 2850 + }, + { + "entropy": 0.12022399662062526, + "epoch": 7.532813515269655, + "grad_norm": 1.0657345056533813, + "learning_rate": 6.65134095655596e-05, + "loss": 0.4089087677001953, + "mean_token_accuracy": 0.9689779531955719, + "num_tokens": 7113063.0, + "step": 2900 + }, + { + "entropy": 0.11429863104596734, + "epoch": 7.662768031189084, + "grad_norm": 1.3440358638763428, + "learning_rate": 6.008770418837973e-05, + "loss": 0.3935198593139648, + "mean_token_accuracy": 0.9698223957419395, + "num_tokens": 7237174.0, + "step": 2950 + }, + { + "entropy": 0.11748226622119545, + "epoch": 7.792722547108512, + "grad_norm": 1.4607034921646118, + "learning_rate": 5.3929841878693804e-05, + "loss": 0.40399799346923826, + "mean_token_accuracy": 0.9695871344208717, + "num_tokens": 7357301.0, + "step": 3000 + }, + { + "entropy": 0.11790506653487683, + "epoch": 7.92267706302794, + "grad_norm": 1.4574708938598633, + "learning_rate": 4.805247550143646e-05, + "loss": 0.4049314880371094, + "mean_token_accuracy": 0.9693469110131264, + "num_tokens": 7482431.0, + "step": 3050 + }, + { + "epoch": 8.0, + "eval_entropy": 0.2104659411483086, + "eval_loss": 0.9939886927604675, + "eval_mean_token_accuracy": 0.8444042455118436, + "eval_num_tokens": 7558256.0, + "eval_runtime": 90.3118, + "eval_samples_per_second": 18.348, + "eval_steps_per_second": 2.303, + "step": 3080 + }, + { + "entropy": 0.10608276399086468, + "epoch": 8.05198180636777, + "grad_norm": 1.4720594882965088, + "learning_rate": 4.246768157264548e-05, + "loss": 0.3582034683227539, + "mean_token_accuracy": 0.9727947966537284, + "num_tokens": 7607307.0, + "step": 3100 + }, + { + "entropy": 0.09448420397937297, + "epoch": 8.1819363222872, + "grad_norm": 1.006718397140503, + "learning_rate": 3.718693544524604e-05, + "loss": 0.3269464874267578, + "mean_token_accuracy": 0.976178829073906, + "num_tokens": 7727840.0, + "step": 3150 + }, + { + "entropy": 0.09565175730735063, + "epoch": 8.311890838206628, + "grad_norm": 1.0114370584487915, + "learning_rate": 3.222108773007395e-05, + "loss": 0.330229606628418, + "mean_token_accuracy": 0.974904423058033, + "num_tokens": 7848468.0, + "step": 3200 + }, + { + "entropy": 0.0942081324569881, + "epoch": 8.441845354126055, + "grad_norm": 0.8164042234420776, + "learning_rate": 2.7580342000587992e-05, + "loss": 0.32308143615722656, + "mean_token_accuracy": 0.9755518987774849, + "num_tokens": 7971324.0, + "step": 3250 + }, + { + "entropy": 0.10181596595793962, + "epoch": 8.571799870045485, + "grad_norm": 1.7562603950500488, + "learning_rate": 2.327423382708144e-05, + "loss": 0.3492561340332031, + "mean_token_accuracy": 0.9740999150276184, + "num_tokens": 8086025.0, + "step": 3300 + }, + { + "entropy": 0.09124232700094581, + "epoch": 8.701754385964913, + "grad_norm": 1.117050051689148, + "learning_rate": 1.9311611183473242e-05, + "loss": 0.31594392776489255, + "mean_token_accuracy": 0.9763092172145843, + "num_tokens": 8213441.0, + "step": 3350 + }, + { + "entropy": 0.0920115345157683, + "epoch": 8.83170890188434, + "grad_norm": 0.9090991616249084, + "learning_rate": 1.5700616266937413e-05, + "loss": 0.32107589721679686, + "mean_token_accuracy": 0.9758272641897201, + "num_tokens": 8340533.0, + "step": 3400 + }, + { + "entropy": 0.09023899069055914, + "epoch": 8.961663417803768, + "grad_norm": 0.7739766240119934, + "learning_rate": 1.244866876772696e-05, + "loss": 0.3140911674499512, + "mean_token_accuracy": 0.9761302083730697, + "num_tokens": 8468264.0, + "step": 3450 + }, + { + "epoch": 9.0, + "eval_entropy": 0.176344332141945, + "eval_loss": 1.1650298833847046, + "eval_mean_token_accuracy": 0.84236626756879, + "eval_num_tokens": 8503038.0, + "eval_runtime": 90.2504, + "eval_samples_per_second": 18.36, + "eval_steps_per_second": 2.305, + "step": 3465 + }, + { + "entropy": 0.08526831743630332, + "epoch": 9.0909681611436, + "grad_norm": 0.5088723301887512, + "learning_rate": 9.562450623568299e-06, + "loss": 0.2858224678039551, + "mean_token_accuracy": 0.9787911883550673, + "num_tokens": 8590639.0, + "step": 3500 + }, + { + "entropy": 0.08262471918947994, + "epoch": 9.220922677063028, + "grad_norm": 0.4305328130722046, + "learning_rate": 7.04789228995284e-06, + "loss": 0.2831817626953125, + "mean_token_accuracy": 0.9781179207563401, + "num_tokens": 8717381.0, + "step": 3550 + }, + { + "entropy": 0.08766877153888344, + "epoch": 9.350877192982455, + "grad_norm": 0.981825053691864, + "learning_rate": 4.910160554535881e-06, + "loss": 0.3028615188598633, + "mean_token_accuracy": 0.9771956187486649, + "num_tokens": 8842405.0, + "step": 3600 + }, + { + "entropy": 0.09164283481426537, + "epoch": 9.480831708901885, + "grad_norm": 0.4387640655040741, + "learning_rate": 3.1536479206821647e-06, + "loss": 0.31055866241455077, + "mean_token_accuracy": 0.9773933503031731, + "num_tokens": 8962503.0, + "step": 3650 + }, + { + "entropy": 0.08803748412057757, + "epoch": 9.610786224821313, + "grad_norm": 0.8736739158630371, + "learning_rate": 1.781963581971202e-06, + "loss": 0.3015053367614746, + "mean_token_accuracy": 0.9770274487137794, + "num_tokens": 9080522.0, + "step": 3700 + }, + { + "entropy": 0.08563575314357877, + "epoch": 9.74074074074074, + "grad_norm": 0.5531356930732727, + "learning_rate": 7.979260062084801e-07, + "loss": 0.2990781593322754, + "mean_token_accuracy": 0.9775880399346352, + "num_tokens": 9202377.0, + "step": 3750 + }, + { + "entropy": 0.08524747521616519, + "epoch": 9.870695256660168, + "grad_norm": 0.6183454990386963, + "learning_rate": 2.0355714417992117e-07, + "loss": 0.29825557708740236, + "mean_token_accuracy": 0.9775007843971253, + "num_tokens": 9323644.0, + "step": 3800 + }, + { + "entropy": 0.08599433758712594, + "epoch": 10.0, + "grad_norm": 0.6046292185783386, + "learning_rate": 7.827504936341819e-11, + "loss": 0.2940228462219238, + "mean_token_accuracy": 0.9778831139880808, + "num_tokens": 9447820.0, + "step": 3850 + }, + { + "epoch": 10.0, + "eval_entropy": 0.1594313821182228, + "eval_loss": 1.271201491355896, + "eval_mean_token_accuracy": 0.84281695032349, + "eval_num_tokens": 9447820.0, + "eval_runtime": 90.3529, + "eval_samples_per_second": 18.339, + "eval_steps_per_second": 2.302, + "step": 3850 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.30836214325416e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/README.md b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd8d6a3d45759ed195574ce08064042ee486ea1 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-4-31B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-4-31B +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/adapter_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6144d96a813fa7fd1ee98cb6160f42880081fc05 --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/adapter_config.json @@ -0,0 +1,40 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-4-31B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.00985279561940916, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": ".*language_model.*\\.(q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)$", + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/tokenizer_config.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf51dc7f4d936ea414099bc3bb7579d17a0184ca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/tokenizer_config.json @@ -0,0 +1,54 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/trainer_state.json b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..faf4d916f1913b8b3fa75e0c525bb7cb83f3caca --- /dev/null +++ b/DBCA_original_Estonian/gemma-4-31B_original_features_structural_train_original_features_structural_test2/checkpoint-770/trainer_state.json @@ -0,0 +1,206 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 770, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.353258643448353, + "epoch": 0.1299545159194282, + "grad_norm": 3.010725975036621, + "learning_rate": 4.8475852375026876e-05, + "loss": 5.475971069335937, + "mean_token_accuracy": 0.7263440760970116, + "num_tokens": 128842.0, + "step": 50 + }, + { + "entropy": 0.649170914888382, + "epoch": 0.2599090318388564, + "grad_norm": 1.9099390506744385, + "learning_rate": 9.794100785974817e-05, + "loss": 2.55168701171875, + "mean_token_accuracy": 0.8364580717682838, + "num_tokens": 255497.0, + "step": 100 + }, + { + "entropy": 0.5930788792669773, + "epoch": 0.3898635477582846, + "grad_norm": 2.1239051818847656, + "learning_rate": 0.0001474061633444695, + "loss": 2.3440716552734373, + "mean_token_accuracy": 0.8452290838956833, + "num_tokens": 372014.0, + "step": 150 + }, + { + "entropy": 0.5564522063732147, + "epoch": 0.5198180636777128, + "grad_norm": 411.71807861328125, + "learning_rate": 0.00019687131882919077, + "loss": 2.2838446044921876, + "mean_token_accuracy": 0.8498487600684166, + "num_tokens": 500623.0, + "step": 200 + }, + { + "entropy": 0.5539529167115689, + "epoch": 0.649772579597141, + "grad_norm": 2.1969902515411377, + "learning_rate": 0.0002463364743139121, + "loss": 2.675394287109375, + "mean_token_accuracy": 0.8430694487690925, + "num_tokens": 616223.0, + "step": 250 + }, + { + "entropy": 0.5719467167556286, + "epoch": 0.7797270955165692, + "grad_norm": 1.98796546459198, + "learning_rate": 0.00029580162979863343, + "loss": 2.2434300231933593, + "mean_token_accuracy": 0.851241897046566, + "num_tokens": 737263.0, + "step": 300 + }, + { + "entropy": 0.5502805083990097, + "epoch": 0.9096816114359974, + "grad_norm": 2.0211398601531982, + "learning_rate": 0.0003452667852833547, + "loss": 2.1729367065429686, + "mean_token_accuracy": 0.8554597494006156, + "num_tokens": 861477.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.5580813550891784, + "eval_loss": 0.5830356478691101, + "eval_mean_token_accuracy": 0.8432669037809739, + "eval_num_tokens": 944782.0, + "eval_runtime": 90.3664, + "eval_samples_per_second": 18.336, + "eval_steps_per_second": 2.302, + "step": 385 + }, + { + "entropy": 0.5498402091725987, + "epoch": 1.0389863547758285, + "grad_norm": 3.8034188747406006, + "learning_rate": 0.000380866355527619, + "loss": 2.113946990966797, + "mean_token_accuracy": 0.8578129452676629, + "num_tokens": 982803.0, + "step": 400 + }, + { + "entropy": 0.5182110907137394, + "epoch": 1.1689408706952567, + "grad_norm": 2.7830824851989746, + "learning_rate": 0.0003805611725593471, + "loss": 1.9833453369140626, + "mean_token_accuracy": 0.8656822636723518, + "num_tokens": 1105926.0, + "step": 450 + }, + { + "entropy": 0.5260789206624031, + "epoch": 1.2988953866146848, + "grad_norm": 1.7993361949920654, + "learning_rate": 0.0003798653399371568, + "loss": 2.006897430419922, + "mean_token_accuracy": 0.8631055191159248, + "num_tokens": 1229857.0, + "step": 500 + }, + { + "entropy": 0.5327546864748001, + "epoch": 1.428849902534113, + "grad_norm": 1.7606678009033203, + "learning_rate": 0.0003787802874228295, + "loss": 2.020283050537109, + "mean_token_accuracy": 0.8638329988718033, + "num_tokens": 1352330.0, + "step": 550 + }, + { + "entropy": 0.5285360223054886, + "epoch": 1.5588044184535412, + "grad_norm": 4.76006555557251, + "learning_rate": 0.00037730824452755275, + "loss": 1.9987391662597656, + "mean_token_accuracy": 0.8644696187973022, + "num_tokens": 1474790.0, + "step": 600 + }, + { + "entropy": 0.5134804363548756, + "epoch": 1.6887589343729694, + "grad_norm": 1.8447264432907104, + "learning_rate": 0.000375452235930833, + "loss": 1.9669386291503905, + "mean_token_accuracy": 0.8659948265552521, + "num_tokens": 1600381.0, + "step": 650 + }, + { + "entropy": 0.5371069309115409, + "epoch": 1.8187134502923976, + "grad_norm": 1.6537392139434814, + "learning_rate": 0.00037321607526553675, + "loss": 2.0411550903320315, + "mean_token_accuracy": 0.8624854254722595, + "num_tokens": 1716827.0, + "step": 700 + }, + { + "entropy": 0.5270501750707627, + "epoch": 1.9486679662118258, + "grad_norm": 2.6990911960601807, + "learning_rate": 0.00037060435728183, + "loss": 2.015792236328125, + "mean_token_accuracy": 0.8631013777852058, + "num_tokens": 1842798.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5477195472384875, + "eval_loss": 0.5585702657699585, + "eval_mean_token_accuracy": 0.8486175815073344, + "eval_num_tokens": 1889564.0, + "eval_runtime": 90.2194, + "eval_samples_per_second": 18.366, + "eval_steps_per_second": 2.305, + "step": 770 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.62787196730174e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1206/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1206/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1206/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1206/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1206/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b54ab049e4368ea5ea2e07f6a875f1a6cb1c3f7 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1206/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cfebb9762cd2d8ac67e5eef9b6d616a5b757a3cf612a8d39ed6b720241ca0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1608/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1608/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1608/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1608/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1608/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b54ab049e4368ea5ea2e07f6a875f1a6cb1c3f7 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test1/checkpoint-1608/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cfebb9762cd2d8ac67e5eef9b6d616a5b757a3cf612a8d39ed6b720241ca0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1206/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1206/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1206/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1206/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1206/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1206/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1608/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1608/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1608/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1608/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1608/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-1608/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2010/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2010/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2010/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2010/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2010/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2010/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2412/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2412/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2412/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2412/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2412/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2412/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2814/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2814/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2814/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2814/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2814/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-2814/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3216/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3216/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3216/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3216/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3216/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3216/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3618/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3618/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3618/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3618/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3618/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-3618/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-402/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-402/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-402/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-402/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-402/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-402/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-4020/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-4020/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-4020/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-4020/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-4020/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-4020/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-804/tokenizer.json b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-804/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-804/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-804/training_args.bin b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-804/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eee7a9ec754ed817204451a69367f727d840c04f --- /dev/null +++ b/substitutivity_original_Swedish/gemma-4-31B_substitutivity_splits_original_features_train_substitutivity_splits_original_features_test2/checkpoint-804/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0833132db3e8e58dcb98c35433410cfd475aaf758878ea06e17ab35dd6b1d0 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f26c57bce3037e858d3b3ab8fe8e292559981f1 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18b2ec3e36c2994910927613f6fec5bccca96b549da535d894191d0b70ba4226 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1401/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9929379db6a5437c041560ad6886220cb9a61259 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:609bed351b74917cff8cd285e7a864ab3c1dc6ac14cf3474c876028e4f6cbc5f +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1868/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60292b2ee9f3c039dccfdefddcad2ba22de1ce7a --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52dd41fcb7a6fb221ec9c6d13b127a7cbc8e26507edee1e15d8002c89677c1c0 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2335/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec7a52b4c300b15762c80274d0a4f066caf00402 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4de7af9fc5466ce6baae52013a4d34f43cd4201b65322147f70650611ad268 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2802/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a63d21f78494346a42e6a893154617520cfa6a52 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f3acb02feefd31c41c902af179ffdc03ce65389d3645b1a840a73fa2f7e3cc1 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3269/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d001f26d325765f2475bd37121f66244c157a78f --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcebb9ecb9965d6badd6f7f7a006cf7c1ec991dd08f819ca076614d176889afb +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3736/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55d50db0ce10b3769eede5026ab34be95b20f7f5 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b497ea0b91af9cd80fdcb7d3c1b9d2ffa22c599eef0fb5e444ee349bd79cc2c +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4203/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce8720746ca952ed92269e3c1c46ad6c6a9a62c1 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98478eec5176d5ba9874beed36b69a28a266e894827f99ad8daa8d320ab4f0ef +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-467/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bc32f6dca742290e49cf6783d5fcecc0f0e01ca --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f04dae9972a3cbb2d58fbdb6f368420c7a13bd0ec108adf927c9b35be1c59fe +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4670/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9611270c0fcf189a5bc4ee19cc7d21a77a217daf --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87579718c5232054c711887699d379334a9bfd34621e421cb51666d6fe84aead +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..981d68637099fed07f992fffaccdb9ed741a4e85 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-934/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a629ddad761c792c89fff60d0aece7faf911d65389e62cbba072d460695f930d +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25de9ba6b42c91d786243867f84bbac27f8c3c3f --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa02b138e514cb40ad237763d1eb697f15de7c7e8737ea7350a727c31dd31804 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1401/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb8b9cc04c7e68ab1571f8f48d57f9feb6c15b1e --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea090144ad02e77b7668b7b6b628cd69e34bab753fdedefe12789f43fc471fc6 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1868/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b3f2edc7340016c99d0a64db505a4c0f4e66a73 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57432290fec27c0a4ab3024281e5d1b031e7556c020bae1ac806c021222d3e4 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2335/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7624edca7bd2dbd61c4300b5927dd6dd2876f89a --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a748c1ad408d3a16750afd7b0f7b04fb0b61f81508e81059ec4e27a3c0938d57 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2802/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3da1fa3d471d8da228cf0f1aed82e6af44030954 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56241e0da6d5d635a5f0c9836a88a732f80756512b8b246008454def621f7ea +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3269/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f45cd5b09a9e5d6390d1a762f12a9b1a19079fe9 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eba2860c01f0459dd2a60efede1cbb49561f048b1789570ad43d284d704a977 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3736/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d135b53972f8aac46c2ac8e3cbb4bbae296834be --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14b76bffc18b13103f1fd59b4d77dd8605779beb834ffadd67e41109ebf46dc9 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4203/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..346f54780d2fe3a77bac63028492de7f8a944a11 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:246753ee0ef4a519a19f949af29927fdcb421b6e8c85452153ce00f0231519a2 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-467/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28647633d302119b04ef34988006e1d50b811886 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef8f70724109cca0e957a9df3dcfdd417d64b75b173579283092076f6bf7e9df +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4670/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/adapter_model.safetensors b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34ac3402b43135046f877178c1a21efe28f1f9d5 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e1554b18c3515d087ba165e9f2883dbc90639a88816d341889d198828288a0 +size 979558760 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/tokenizer.json b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/training_args.bin b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d398e8b54bbb34fd6c12d259e1bff2f532e11979 --- /dev/null +++ b/systematicity_original_Estonian/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-934/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5422a9577f7a2bd2b862d6265b70ae5e1f4c766a2b0b45ae6b9298ce6eba62 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4077a069f4e0db195c477810ba99066327f85850 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69ddbdd7ec7a3de6c702033bbcd95d77800173cb71573e1190938c7b3f07102e +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1314/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a98be784d1b7a378214ede3c63697bb091f29bb8 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d5f933a73fb2a2503e74342ec8667e06fb2dc567c5996eaa601a2b9e7299c4 +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-1752/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aae88cdf7fd1708c25c8a83723b949a54671eb0f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fffdaafff817eb2568b07d0816bd37e8d8c86ee7618a6b9ee70f42b19d814507 +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58d90c75766ac990814cedb4fc82626962c7dd31 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695247a37031d84bd2d8169f1675452e017443b14add4a6c47ee16820b1c22ec +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-2628/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8216f985a6ecb63b3ad9cc5a37b251a7ecc0970 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be06d2733eaff8b4ba56106128a72643edded45c74424faba0d565029de1bbda +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3066/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d141b0803b8921be7fb275e26c331d85d8cce8e --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a045c6390f85484d8fc1f1489860d4cd2ca222ce98ab037b02864c25cc4f27 +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3504/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11034c740fb7b42860ca967e0d6187f393927cc1 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bbdc430c398a3cd971f9ba493390996277dc6bf7725cf111cb85282bc6ae44f +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-3942/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f240af616c76ee7229233a4619e2ddb8404ddaca --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:587ab8a5a6724969f09fb2a2d5f8cd947889810d6f2f638233fc1afda169d600 +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-438/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5197287a8985673c6323d7120f40c7db2f44932b --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89ead53d4562e2826f9b0818045cfd3008153f4aa9c26e92f70e68fc76353768 +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-4380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fa2590b49c905e7a57b4c9e873c2dce09013e32 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4bb0eb90d5da5a96989b360f36b429d95eec38d6e6a282e434260d2849dfab8 +size 489840816 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a21954a7bb8b1e5cf525201350f79fde9e84c408 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test1/checkpoint-876/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ae3bc2fd16b1f71b1392a195ce2428736c32e8d2b16d0bbcf84415b1c48d96 +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..295d1dde49ff0f7c76dd778f86fb901bbacc3a17 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c50311e552bd76c66dae162545370ab0a861a17819efad1461eb59ec77c125a +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1314/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11dd4480a20d35a1513b52163f0986037457b09c --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56c4fff2962d0c91cbf7e517664b2219d9c28e4b53ae6b1bf44676b6a1dd004 +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-1752/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f37c2ec57575111e98a7f84ddc7d31938ab18633 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c346eb65ec5c533a50dfcefa9f6eaecb159ef2c9bb9e72abf698dfd26c44bdb5 +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71b67ccb319431e140db7348210f728dbf646fe8 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec30629607c0e43035b88610ea5966f1b40caef6b61e93d308e8598f44ed9170 +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-2628/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f72d5c1aed3f6b548e75d8a8103d505833cd749a --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcca6394cb7561430b2ccf444c4181c63e7771f7a81b48d9fc77f0f101f5c020 +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3066/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a29f7513d934e38a57523105483097e5d867185 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d654b7f2b426eca85d46fd9e427e382d0ad8a3a1b13068d2fa83060a42774e +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3504/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3c30bfd2f249e6986cd2a94f563a72099b54279 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6ae9aaf8fa308dc2999718427d3433eacd75aa3729ecce4b77b9022d583595a +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-3942/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0cc616e36da8b304bf72b95ffd30a60faba984dd --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e12c154201c6d595e5444d3a252e1d04bed0bd3de69f7c5acd3b6bb08b4cc4 +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-438/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3704b3d0d6ef3e166de2de3394a32a3bc5e56f78 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ae2157567a14645980dc437730543249b5593adf9c722da0adeadb833b977f +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-4380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/adapter_model.safetensors b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f67678224d23650d61c9f623cea40bb85ede0e4a --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58aa51277486093c3cc4515b76da6cda5ffa4c705e4c854fcb4e75ddfce5e328 +size 3917867504 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/tokenizer.json b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..890ddaefca8201dd023c4f013db5ce492d12e7e0 --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6 +size 32170070 diff --git a/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/training_args.bin b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a2494bdaabc0fd0c609e2a3225bef5c5d45752f --- /dev/null +++ b/systematicity_original_Swedish/gemma-4-31B_systematicity_splits_original_features_train_systematicity_splits_original_features_test2/checkpoint-876/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addec48fe685139d34c73b592562116b935ccac7e4c28714c8c76fb7056980af +size 6033