diff --git a/.gitattributes b/.gitattributes index 450694353054abb5890e37434207b87c07eeb1fc..98f089b0a71de7b60459d8b4c8077bff928be7bc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -473,3 +473,292 @@ overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_orig overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1520/tokenizer.json filter=lfs diff=lfs merge=lfs -text overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1540/tokenizer.json filter=lfs diff=lfs merge=lfs -text overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1560/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4020/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-420/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-440/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-460/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-480/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-520/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-540/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-560/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-580/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-60/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-620/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-640/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-660/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-680/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-720/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-740/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-760/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1020/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1040/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1060/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1080/tokenizer.json filter=lfs diff=lfs merge=lfs -text +overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1100/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..02155913d0c922d0910fa39aac917f713e1f721a --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/README.md @@ -0,0 +1,58 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: transformers +model_name: Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1 +tags: +- generated_from_trainer +- sft +- trl +licence: license +--- + +# Model Card for Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1 + +This model is a fine-tuned version of [Qwen/Qwen3-4B-Base](https://huggingface.co/Qwen/Qwen3-4B-Base). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/katriin-kukk/Cross_lingual_morphological_generalization/runs/d2qeywxf) + + + +This model was trained with SFT. + +### Framework versions + +- TRL: 0.29.0 +- Transformers: 5.5.4 +- Pytorch: 2.10.0 +- Datasets: 4.6.1 +- Tokenizers: 0.22.2 + +## Citations + + + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b7b923a301af4113e0aa591d097678b1fa73025c --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.009078376988692594, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fdeb9f7446bdd99fb586f941cbee1e8172107111 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1155/trainer_state.json @@ -0,0 +1,297 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1155, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.2479030179977415, + "epoch": 0.1299545159194282, + "grad_norm": 1.519571304321289, + "learning_rate": 3.522207847653314e-05, + "loss": 2.093206329345703, + "mean_token_accuracy": 0.6068353663384914, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.932415626347065, + "epoch": 0.2599090318388564, + "grad_norm": 1.180830955505371, + "learning_rate": 7.11629748811588e-05, + "loss": 0.8930854797363281, + "mean_token_accuracy": 0.7708445385098457, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.7730373838543891, + "epoch": 0.3898635477582846, + "grad_norm": 0.7839977145195007, + "learning_rate": 0.00010710387128578447, + "loss": 0.7302116394042969, + "mean_token_accuracy": 0.8012136635184288, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6934178560972214, + "epoch": 0.5198180636777128, + "grad_norm": 0.666778564453125, + "learning_rate": 0.0001430447676904101, + "loss": 0.6505754852294922, + "mean_token_accuracy": 0.8195212116837501, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6900296103954315, + "epoch": 0.649772579597141, + "grad_norm": 0.6762415766716003, + "learning_rate": 0.00017898566409503577, + "loss": 0.6378536987304687, + "mean_token_accuracy": 0.8223087686300278, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.667421719878912, + "epoch": 0.7797270955165692, + "grad_norm": 0.5047685503959656, + "learning_rate": 0.00021492656049966144, + "loss": 0.6148524856567383, + "mean_token_accuracy": 0.8280292323231697, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6388977643847465, + "epoch": 0.9096816114359974, + "grad_norm": 0.4360353350639343, + "learning_rate": 0.0002508674569042871, + "loss": 0.5933729553222656, + "mean_token_accuracy": 0.8329134130477905, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6612381511009656, + "eval_loss": 0.6559221744537354, + "eval_mean_token_accuracy": 0.8195324820967821, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.4007, + "eval_samples_per_second": 31.03, + "eval_steps_per_second": 3.895, + "step": 385 + }, + { + "entropy": 0.6224366770916848, + "epoch": 1.0389863547758285, + "grad_norm": 0.5294668078422546, + "learning_rate": 0.00027673375518355765, + "loss": 0.5677951431274414, + "mean_token_accuracy": 0.8380465067211708, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5827466724812984, + "epoch": 1.1689408706952567, + "grad_norm": 0.5172416567802429, + "learning_rate": 0.0002765120122346144, + "loss": 0.5423126983642578, + "mean_token_accuracy": 0.8467991036176682, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5831517253816128, + "epoch": 1.2988953866146848, + "grad_norm": 0.41916292905807495, + "learning_rate": 0.0002760064270819138, + "loss": 0.534448013305664, + "mean_token_accuracy": 0.8456632816791534, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5869986982643605, + "epoch": 1.428849902534113, + "grad_norm": 0.4387759566307068, + "learning_rate": 0.00027521803857633113, + "loss": 0.5367491912841796, + "mean_token_accuracy": 0.8462416216731071, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5771756853163242, + "epoch": 1.5588044184535412, + "grad_norm": 0.49079665541648865, + "learning_rate": 0.00027414846665880935, + "loss": 0.5238623809814453, + "mean_token_accuracy": 0.84760089635849, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.5549105909466744, + "epoch": 1.6887589343729694, + "grad_norm": 0.4000363051891327, + "learning_rate": 0.0002727999090317863, + "loss": 0.510434226989746, + "mean_token_accuracy": 0.8517858856916427, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.583413660377264, + "epoch": 1.8187134502923976, + "grad_norm": 0.33592426776885986, + "learning_rate": 0.00027117513664346674, + "loss": 0.5297993850708008, + "mean_token_accuracy": 0.846615691781044, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5732646904885769, + "epoch": 1.9486679662118258, + "grad_norm": 0.5528839230537415, + "learning_rate": 0.00026927748799421714, + "loss": 0.5219194793701172, + "mean_token_accuracy": 0.8489033079147339, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6027900550801021, + "eval_loss": 0.5946928858757019, + "eval_mean_token_accuracy": 0.8318195798649237, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.3837, + "eval_samples_per_second": 31.039, + "eval_steps_per_second": 3.896, + "step": 770 + }, + { + "entropy": 0.5329899657611272, + "epoch": 2.077972709551657, + "grad_norm": 0.45793575048446655, + "learning_rate": 0.0002671108622767842, + "loss": 0.48420516967773436, + "mean_token_accuracy": 0.8578200301333289, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5142687204480171, + "epoch": 2.207927225471085, + "grad_norm": 0.4690960645675659, + "learning_rate": 0.0002646797113644295, + "loss": 0.4593114471435547, + "mean_token_accuracy": 0.8622670090198516, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5135884112119675, + "epoch": 2.3378817413905133, + "grad_norm": 0.3752821683883667, + "learning_rate": 0.00026198903066344565, + "loss": 0.4626216125488281, + "mean_token_accuracy": 0.8612511262297631, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5137367483973503, + "epoch": 2.4678362573099415, + "grad_norm": 0.3726271390914917, + "learning_rate": 0.0002590443488488465, + "loss": 0.4601683807373047, + "mean_token_accuracy": 0.8620512077212333, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5105714881420136, + "epoch": 2.5977907732293697, + "grad_norm": 0.41296717524528503, + "learning_rate": 0.00025585171650432525, + "loss": 0.46279102325439453, + "mean_token_accuracy": 0.8611763519048691, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5169161760807037, + "epoch": 2.727745289148798, + "grad_norm": 0.4614253044128418, + "learning_rate": 0.0002524176936898197, + "loss": 0.45492774963378907, + "mean_token_accuracy": 0.8627680170536042, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.4989277676492929, + "epoch": 2.857699805068226, + "grad_norm": 0.37512704730033875, + "learning_rate": 0.00024874933646223225, + "loss": 0.4531984329223633, + "mean_token_accuracy": 0.8637665447592735, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5177617704868317, + "epoch": 2.9876543209876543, + "grad_norm": 0.3700532019138336, + "learning_rate": 0.00024485418237699976, + "loss": 0.45844474792480466, + "mean_token_accuracy": 0.8626988258957863, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5253989950108987, + "eval_loss": 0.5857328176498413, + "eval_mean_token_accuracy": 0.8360884573597175, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.341, + "eval_samples_per_second": 31.064, + "eval_steps_per_second": 3.899, + "step": 1155 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4826229545264538e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b7b923a301af4113e0aa591d097678b1fa73025c --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.009078376988692594, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..79de678f9354c2e8dc9486518a6d4be189333e1c --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1540/trainer_state.json @@ -0,0 +1,378 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 1540, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.2479030179977415, + "epoch": 0.1299545159194282, + "grad_norm": 1.519571304321289, + "learning_rate": 3.522207847653314e-05, + "loss": 2.093206329345703, + "mean_token_accuracy": 0.6068353663384914, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.932415626347065, + "epoch": 0.2599090318388564, + "grad_norm": 1.180830955505371, + "learning_rate": 7.11629748811588e-05, + "loss": 0.8930854797363281, + "mean_token_accuracy": 0.7708445385098457, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.7730373838543891, + "epoch": 0.3898635477582846, + "grad_norm": 0.7839977145195007, + "learning_rate": 0.00010710387128578447, + "loss": 0.7302116394042969, + "mean_token_accuracy": 0.8012136635184288, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6934178560972214, + "epoch": 0.5198180636777128, + "grad_norm": 0.666778564453125, + "learning_rate": 0.0001430447676904101, + "loss": 0.6505754852294922, + "mean_token_accuracy": 0.8195212116837501, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6900296103954315, + "epoch": 0.649772579597141, + "grad_norm": 0.6762415766716003, + "learning_rate": 0.00017898566409503577, + "loss": 0.6378536987304687, + "mean_token_accuracy": 0.8223087686300278, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.667421719878912, + "epoch": 0.7797270955165692, + "grad_norm": 0.5047685503959656, + "learning_rate": 0.00021492656049966144, + "loss": 0.6148524856567383, + "mean_token_accuracy": 0.8280292323231697, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6388977643847465, + "epoch": 0.9096816114359974, + "grad_norm": 0.4360353350639343, + "learning_rate": 0.0002508674569042871, + "loss": 0.5933729553222656, + "mean_token_accuracy": 0.8329134130477905, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6612381511009656, + "eval_loss": 0.6559221744537354, + "eval_mean_token_accuracy": 0.8195324820967821, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.4007, + "eval_samples_per_second": 31.03, + "eval_steps_per_second": 3.895, + "step": 385 + }, + { + "entropy": 0.6224366770916848, + "epoch": 1.0389863547758285, + "grad_norm": 0.5294668078422546, + "learning_rate": 0.00027673375518355765, + "loss": 0.5677951431274414, + "mean_token_accuracy": 0.8380465067211708, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5827466724812984, + "epoch": 1.1689408706952567, + "grad_norm": 0.5172416567802429, + "learning_rate": 0.0002765120122346144, + "loss": 0.5423126983642578, + "mean_token_accuracy": 0.8467991036176682, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5831517253816128, + "epoch": 1.2988953866146848, + "grad_norm": 0.41916292905807495, + "learning_rate": 0.0002760064270819138, + "loss": 0.534448013305664, + "mean_token_accuracy": 0.8456632816791534, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5869986982643605, + "epoch": 1.428849902534113, + "grad_norm": 0.4387759566307068, + "learning_rate": 0.00027521803857633113, + "loss": 0.5367491912841796, + "mean_token_accuracy": 0.8462416216731071, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5771756853163242, + "epoch": 1.5588044184535412, + "grad_norm": 0.49079665541648865, + "learning_rate": 0.00027414846665880935, + "loss": 0.5238623809814453, + "mean_token_accuracy": 0.84760089635849, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.5549105909466744, + "epoch": 1.6887589343729694, + "grad_norm": 0.4000363051891327, + "learning_rate": 0.0002727999090317863, + "loss": 0.510434226989746, + "mean_token_accuracy": 0.8517858856916427, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.583413660377264, + "epoch": 1.8187134502923976, + "grad_norm": 0.33592426776885986, + "learning_rate": 0.00027117513664346674, + "loss": 0.5297993850708008, + "mean_token_accuracy": 0.846615691781044, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5732646904885769, + "epoch": 1.9486679662118258, + "grad_norm": 0.5528839230537415, + "learning_rate": 0.00026927748799421714, + "loss": 0.5219194793701172, + "mean_token_accuracy": 0.8489033079147339, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6027900550801021, + "eval_loss": 0.5946928858757019, + "eval_mean_token_accuracy": 0.8318195798649237, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.3837, + "eval_samples_per_second": 31.039, + "eval_steps_per_second": 3.896, + "step": 770 + }, + { + "entropy": 0.5329899657611272, + "epoch": 2.077972709551657, + "grad_norm": 0.45793575048446655, + "learning_rate": 0.0002671108622767842, + "loss": 0.48420516967773436, + "mean_token_accuracy": 0.8578200301333289, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5142687204480171, + "epoch": 2.207927225471085, + "grad_norm": 0.4690960645675659, + "learning_rate": 0.0002646797113644295, + "loss": 0.4593114471435547, + "mean_token_accuracy": 0.8622670090198516, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5135884112119675, + "epoch": 2.3378817413905133, + "grad_norm": 0.3752821683883667, + "learning_rate": 0.00026198903066344565, + "loss": 0.4626216125488281, + "mean_token_accuracy": 0.8612511262297631, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5137367483973503, + "epoch": 2.4678362573099415, + "grad_norm": 0.3726271390914917, + "learning_rate": 0.0002590443488488465, + "loss": 0.4601683807373047, + "mean_token_accuracy": 0.8620512077212333, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5105714881420136, + "epoch": 2.5977907732293697, + "grad_norm": 0.41296717524528503, + "learning_rate": 0.00025585171650432525, + "loss": 0.46279102325439453, + "mean_token_accuracy": 0.8611763519048691, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5169161760807037, + "epoch": 2.727745289148798, + "grad_norm": 0.4614253044128418, + "learning_rate": 0.0002524176936898197, + "loss": 0.45492774963378907, + "mean_token_accuracy": 0.8627680170536042, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.4989277676492929, + "epoch": 2.857699805068226, + "grad_norm": 0.37512704730033875, + "learning_rate": 0.00024874933646223225, + "loss": 0.4531984329223633, + "mean_token_accuracy": 0.8637665447592735, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5177617704868317, + "epoch": 2.9876543209876543, + "grad_norm": 0.3700532019138336, + "learning_rate": 0.00024485418237699976, + "loss": 0.45844474792480466, + "mean_token_accuracy": 0.8626988258957863, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5253989950108987, + "eval_loss": 0.5857328176498413, + "eval_mean_token_accuracy": 0.8360884573597175, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.341, + "eval_samples_per_second": 31.064, + "eval_steps_per_second": 3.899, + "step": 1155 + }, + { + "entropy": 0.4535361140517134, + "epoch": 3.116959064327485, + "grad_norm": 0.3412795662879944, + "learning_rate": 0.00024074023500030492, + "loss": 0.3942829132080078, + "mean_token_accuracy": 0.8781378038564519, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.44747897461056707, + "epoch": 3.246913580246914, + "grad_norm": 0.46647050976753235, + "learning_rate": 0.0002364159474637521, + "loss": 0.38986759185791015, + "mean_token_accuracy": 0.8777281475067139, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4480265176296234, + "epoch": 3.3768680961663415, + "grad_norm": 0.4068582355976105, + "learning_rate": 0.00023189020509529866, + "loss": 0.39444759368896487, + "mean_token_accuracy": 0.8774515727162361, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.45180007234215736, + "epoch": 3.50682261208577, + "grad_norm": 0.4249928593635559, + "learning_rate": 0.00022717230716213122, + "loss": 0.3977077102661133, + "mean_token_accuracy": 0.8762744688987731, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.4614932192862034, + "epoch": 3.636777128005198, + "grad_norm": 0.561008095741272, + "learning_rate": 0.00022227194776300045, + "loss": 0.4022808456420898, + "mean_token_accuracy": 0.8760285252332687, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4414680179953575, + "epoch": 3.7667316439246266, + "grad_norm": 0.38943538069725037, + "learning_rate": 0.00021719919590927584, + "loss": 0.38586376190185545, + "mean_token_accuracy": 0.8783121705055237, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45685607343912127, + "epoch": 3.8966861598440543, + "grad_norm": 0.5362406969070435, + "learning_rate": 0.00021196447483564875, + "loss": 0.3983576583862305, + "mean_token_accuracy": 0.8764419692754746, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49981127894268584, + "eval_loss": 0.5997208952903748, + "eval_mean_token_accuracy": 0.8368159819107789, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.4304, + "eval_samples_per_second": 31.012, + "eval_steps_per_second": 3.893, + "step": 1540 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.97414227488298e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b7b923a301af4113e0aa591d097678b1fa73025c --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.009078376988692594, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1a6531b73680ac3be3126ebda58dd01e91d1adb4 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1925/trainer_state.json @@ -0,0 +1,469 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1925, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.2479030179977415, + "epoch": 0.1299545159194282, + "grad_norm": 1.519571304321289, + "learning_rate": 3.522207847653314e-05, + "loss": 2.093206329345703, + "mean_token_accuracy": 0.6068353663384914, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.932415626347065, + "epoch": 0.2599090318388564, + "grad_norm": 1.180830955505371, + "learning_rate": 7.11629748811588e-05, + "loss": 0.8930854797363281, + "mean_token_accuracy": 0.7708445385098457, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.7730373838543891, + "epoch": 0.3898635477582846, + "grad_norm": 0.7839977145195007, + "learning_rate": 0.00010710387128578447, + "loss": 0.7302116394042969, + "mean_token_accuracy": 0.8012136635184288, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6934178560972214, + "epoch": 0.5198180636777128, + "grad_norm": 0.666778564453125, + "learning_rate": 0.0001430447676904101, + "loss": 0.6505754852294922, + "mean_token_accuracy": 0.8195212116837501, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6900296103954315, + "epoch": 0.649772579597141, + "grad_norm": 0.6762415766716003, + "learning_rate": 0.00017898566409503577, + "loss": 0.6378536987304687, + "mean_token_accuracy": 0.8223087686300278, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.667421719878912, + "epoch": 0.7797270955165692, + "grad_norm": 0.5047685503959656, + "learning_rate": 0.00021492656049966144, + "loss": 0.6148524856567383, + "mean_token_accuracy": 0.8280292323231697, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6388977643847465, + "epoch": 0.9096816114359974, + "grad_norm": 0.4360353350639343, + "learning_rate": 0.0002508674569042871, + "loss": 0.5933729553222656, + "mean_token_accuracy": 0.8329134130477905, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6612381511009656, + "eval_loss": 0.6559221744537354, + "eval_mean_token_accuracy": 0.8195324820967821, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.4007, + "eval_samples_per_second": 31.03, + "eval_steps_per_second": 3.895, + "step": 385 + }, + { + "entropy": 0.6224366770916848, + "epoch": 1.0389863547758285, + "grad_norm": 0.5294668078422546, + "learning_rate": 0.00027673375518355765, + "loss": 0.5677951431274414, + "mean_token_accuracy": 0.8380465067211708, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5827466724812984, + "epoch": 1.1689408706952567, + "grad_norm": 0.5172416567802429, + "learning_rate": 0.0002765120122346144, + "loss": 0.5423126983642578, + "mean_token_accuracy": 0.8467991036176682, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5831517253816128, + "epoch": 1.2988953866146848, + "grad_norm": 0.41916292905807495, + "learning_rate": 0.0002760064270819138, + "loss": 0.534448013305664, + "mean_token_accuracy": 0.8456632816791534, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5869986982643605, + "epoch": 1.428849902534113, + "grad_norm": 0.4387759566307068, + "learning_rate": 0.00027521803857633113, + "loss": 0.5367491912841796, + "mean_token_accuracy": 0.8462416216731071, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5771756853163242, + "epoch": 1.5588044184535412, + "grad_norm": 0.49079665541648865, + "learning_rate": 0.00027414846665880935, + "loss": 0.5238623809814453, + "mean_token_accuracy": 0.84760089635849, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.5549105909466744, + "epoch": 1.6887589343729694, + "grad_norm": 0.4000363051891327, + "learning_rate": 0.0002727999090317863, + "loss": 0.510434226989746, + "mean_token_accuracy": 0.8517858856916427, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.583413660377264, + "epoch": 1.8187134502923976, + "grad_norm": 0.33592426776885986, + "learning_rate": 0.00027117513664346674, + "loss": 0.5297993850708008, + "mean_token_accuracy": 0.846615691781044, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5732646904885769, + "epoch": 1.9486679662118258, + "grad_norm": 0.5528839230537415, + "learning_rate": 0.00026927748799421714, + "loss": 0.5219194793701172, + "mean_token_accuracy": 0.8489033079147339, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6027900550801021, + "eval_loss": 0.5946928858757019, + "eval_mean_token_accuracy": 0.8318195798649237, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.3837, + "eval_samples_per_second": 31.039, + "eval_steps_per_second": 3.896, + "step": 770 + }, + { + "entropy": 0.5329899657611272, + "epoch": 2.077972709551657, + "grad_norm": 0.45793575048446655, + "learning_rate": 0.0002671108622767842, + "loss": 0.48420516967773436, + "mean_token_accuracy": 0.8578200301333289, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5142687204480171, + "epoch": 2.207927225471085, + "grad_norm": 0.4690960645675659, + "learning_rate": 0.0002646797113644295, + "loss": 0.4593114471435547, + "mean_token_accuracy": 0.8622670090198516, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5135884112119675, + "epoch": 2.3378817413905133, + "grad_norm": 0.3752821683883667, + "learning_rate": 0.00026198903066344565, + "loss": 0.4626216125488281, + "mean_token_accuracy": 0.8612511262297631, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5137367483973503, + "epoch": 2.4678362573099415, + "grad_norm": 0.3726271390914917, + "learning_rate": 0.0002590443488488465, + "loss": 0.4601683807373047, + "mean_token_accuracy": 0.8620512077212333, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5105714881420136, + "epoch": 2.5977907732293697, + "grad_norm": 0.41296717524528503, + "learning_rate": 0.00025585171650432525, + "loss": 0.46279102325439453, + "mean_token_accuracy": 0.8611763519048691, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5169161760807037, + "epoch": 2.727745289148798, + "grad_norm": 0.4614253044128418, + "learning_rate": 0.0002524176936898197, + "loss": 0.45492774963378907, + "mean_token_accuracy": 0.8627680170536042, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.4989277676492929, + "epoch": 2.857699805068226, + "grad_norm": 0.37512704730033875, + "learning_rate": 0.00024874933646223225, + "loss": 0.4531984329223633, + "mean_token_accuracy": 0.8637665447592735, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5177617704868317, + "epoch": 2.9876543209876543, + "grad_norm": 0.3700532019138336, + "learning_rate": 0.00024485418237699976, + "loss": 0.45844474792480466, + "mean_token_accuracy": 0.8626988258957863, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5253989950108987, + "eval_loss": 0.5857328176498413, + "eval_mean_token_accuracy": 0.8360884573597175, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.341, + "eval_samples_per_second": 31.064, + "eval_steps_per_second": 3.899, + "step": 1155 + }, + { + "entropy": 0.4535361140517134, + "epoch": 3.116959064327485, + "grad_norm": 0.3412795662879944, + "learning_rate": 0.00024074023500030492, + "loss": 0.3942829132080078, + "mean_token_accuracy": 0.8781378038564519, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.44747897461056707, + "epoch": 3.246913580246914, + "grad_norm": 0.46647050976753235, + "learning_rate": 0.0002364159474637521, + "loss": 0.38986759185791015, + "mean_token_accuracy": 0.8777281475067139, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4480265176296234, + "epoch": 3.3768680961663415, + "grad_norm": 0.4068582355976105, + "learning_rate": 0.00023189020509529866, + "loss": 0.39444759368896487, + "mean_token_accuracy": 0.8774515727162361, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.45180007234215736, + "epoch": 3.50682261208577, + "grad_norm": 0.4249928593635559, + "learning_rate": 0.00022717230716213122, + "loss": 0.3977077102661133, + "mean_token_accuracy": 0.8762744688987731, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.4614932192862034, + "epoch": 3.636777128005198, + "grad_norm": 0.561008095741272, + "learning_rate": 0.00022227194776300045, + "loss": 0.4022808456420898, + "mean_token_accuracy": 0.8760285252332687, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4414680179953575, + "epoch": 3.7667316439246266, + "grad_norm": 0.38943538069725037, + "learning_rate": 0.00021719919590927584, + "loss": 0.38586376190185545, + "mean_token_accuracy": 0.8783121705055237, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45685607343912127, + "epoch": 3.8966861598440543, + "grad_norm": 0.5362406969070435, + "learning_rate": 0.00021196447483564875, + "loss": 0.3983576583862305, + "mean_token_accuracy": 0.8764419692754746, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49981127894268584, + "eval_loss": 0.5997208952903748, + "eval_mean_token_accuracy": 0.8368159819107789, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.4304, + "eval_samples_per_second": 31.012, + "eval_steps_per_second": 3.893, + "step": 1540 + }, + { + "entropy": 0.4439909208060509, + "epoch": 4.025990903183885, + "grad_norm": 0.5490113496780396, + "learning_rate": 0.00020657854058299564, + "loss": 0.38307292938232423, + "mean_token_accuracy": 0.8795150506436525, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.3837250977009535, + "epoch": 4.155945419103314, + "grad_norm": 0.5567234754562378, + "learning_rate": 0.0002010524598974076, + "loss": 0.3182963752746582, + "mean_token_accuracy": 0.8964017608761787, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.377872094810009, + "epoch": 4.2858999350227425, + "grad_norm": 0.4315710961818695, + "learning_rate": 0.00019539758749079845, + "loss": 0.318333683013916, + "mean_token_accuracy": 0.8963816618919372, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.38739304527640345, + "epoch": 4.41585445094217, + "grad_norm": 0.49140632152557373, + "learning_rate": 0.00018962554270981555, + "loss": 0.32688804626464846, + "mean_token_accuracy": 0.8937860554456711, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.39157475270330905, + "epoch": 4.545808966861598, + "grad_norm": 0.40667369961738586, + "learning_rate": 0.00018374818566099208, + "loss": 0.3305763626098633, + "mean_token_accuracy": 0.8916732975840569, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.3838599680364132, + "epoch": 4.675763482781027, + "grad_norm": 0.4632417857646942, + "learning_rate": 0.0001777775928411983, + "loss": 0.3267818450927734, + "mean_token_accuracy": 0.8946500706672669, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.38270787581801413, + "epoch": 4.805717998700455, + "grad_norm": 0.5529720187187195, + "learning_rate": 0.0001717260323234649, + "loss": 0.3264235305786133, + "mean_token_accuracy": 0.8948800846934318, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.38736109718680384, + "epoch": 4.935672514619883, + "grad_norm": 0.5604785680770874, + "learning_rate": 0.00016560593854916497, + "loss": 0.3280513381958008, + "mean_token_accuracy": 0.8931388029456139, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4370518812479881, + "eval_loss": 0.6210553050041199, + "eval_mean_token_accuracy": 0.8379779781859654, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.4039, + "eval_samples_per_second": 31.028, + "eval_steps_per_second": 3.895, + "step": 1925 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.466249383274455e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b7b923a301af4113e0aa591d097678b1fa73025c --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.009078376988692594, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eaeb3434bc89730948f44eb9280ea172c6365dd4 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2310/trainer_state.json @@ -0,0 +1,560 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 2310, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.2479030179977415, + "epoch": 0.1299545159194282, + "grad_norm": 1.519571304321289, + "learning_rate": 3.522207847653314e-05, + "loss": 2.093206329345703, + "mean_token_accuracy": 0.6068353663384914, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.932415626347065, + "epoch": 0.2599090318388564, + "grad_norm": 1.180830955505371, + "learning_rate": 7.11629748811588e-05, + "loss": 0.8930854797363281, + "mean_token_accuracy": 0.7708445385098457, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.7730373838543891, + "epoch": 0.3898635477582846, + "grad_norm": 0.7839977145195007, + "learning_rate": 0.00010710387128578447, + "loss": 0.7302116394042969, + "mean_token_accuracy": 0.8012136635184288, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6934178560972214, + "epoch": 0.5198180636777128, + "grad_norm": 0.666778564453125, + "learning_rate": 0.0001430447676904101, + "loss": 0.6505754852294922, + "mean_token_accuracy": 0.8195212116837501, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6900296103954315, + "epoch": 0.649772579597141, + "grad_norm": 0.6762415766716003, + "learning_rate": 0.00017898566409503577, + "loss": 0.6378536987304687, + "mean_token_accuracy": 0.8223087686300278, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.667421719878912, + "epoch": 0.7797270955165692, + "grad_norm": 0.5047685503959656, + "learning_rate": 0.00021492656049966144, + "loss": 0.6148524856567383, + "mean_token_accuracy": 0.8280292323231697, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6388977643847465, + "epoch": 0.9096816114359974, + "grad_norm": 0.4360353350639343, + "learning_rate": 0.0002508674569042871, + "loss": 0.5933729553222656, + "mean_token_accuracy": 0.8329134130477905, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6612381511009656, + "eval_loss": 0.6559221744537354, + "eval_mean_token_accuracy": 0.8195324820967821, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.4007, + "eval_samples_per_second": 31.03, + "eval_steps_per_second": 3.895, + "step": 385 + }, + { + "entropy": 0.6224366770916848, + "epoch": 1.0389863547758285, + "grad_norm": 0.5294668078422546, + "learning_rate": 0.00027673375518355765, + "loss": 0.5677951431274414, + "mean_token_accuracy": 0.8380465067211708, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5827466724812984, + "epoch": 1.1689408706952567, + "grad_norm": 0.5172416567802429, + "learning_rate": 0.0002765120122346144, + "loss": 0.5423126983642578, + "mean_token_accuracy": 0.8467991036176682, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5831517253816128, + "epoch": 1.2988953866146848, + "grad_norm": 0.41916292905807495, + "learning_rate": 0.0002760064270819138, + "loss": 0.534448013305664, + "mean_token_accuracy": 0.8456632816791534, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5869986982643605, + "epoch": 1.428849902534113, + "grad_norm": 0.4387759566307068, + "learning_rate": 0.00027521803857633113, + "loss": 0.5367491912841796, + "mean_token_accuracy": 0.8462416216731071, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5771756853163242, + "epoch": 1.5588044184535412, + "grad_norm": 0.49079665541648865, + "learning_rate": 0.00027414846665880935, + "loss": 0.5238623809814453, + "mean_token_accuracy": 0.84760089635849, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.5549105909466744, + "epoch": 1.6887589343729694, + "grad_norm": 0.4000363051891327, + "learning_rate": 0.0002727999090317863, + "loss": 0.510434226989746, + "mean_token_accuracy": 0.8517858856916427, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.583413660377264, + "epoch": 1.8187134502923976, + "grad_norm": 0.33592426776885986, + "learning_rate": 0.00027117513664346674, + "loss": 0.5297993850708008, + "mean_token_accuracy": 0.846615691781044, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5732646904885769, + "epoch": 1.9486679662118258, + "grad_norm": 0.5528839230537415, + "learning_rate": 0.00026927748799421714, + "loss": 0.5219194793701172, + "mean_token_accuracy": 0.8489033079147339, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6027900550801021, + "eval_loss": 0.5946928858757019, + "eval_mean_token_accuracy": 0.8318195798649237, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.3837, + "eval_samples_per_second": 31.039, + "eval_steps_per_second": 3.896, + "step": 770 + }, + { + "entropy": 0.5329899657611272, + "epoch": 2.077972709551657, + "grad_norm": 0.45793575048446655, + "learning_rate": 0.0002671108622767842, + "loss": 0.48420516967773436, + "mean_token_accuracy": 0.8578200301333289, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5142687204480171, + "epoch": 2.207927225471085, + "grad_norm": 0.4690960645675659, + "learning_rate": 0.0002646797113644295, + "loss": 0.4593114471435547, + "mean_token_accuracy": 0.8622670090198516, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5135884112119675, + "epoch": 2.3378817413905133, + "grad_norm": 0.3752821683883667, + "learning_rate": 0.00026198903066344565, + "loss": 0.4626216125488281, + "mean_token_accuracy": 0.8612511262297631, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5137367483973503, + "epoch": 2.4678362573099415, + "grad_norm": 0.3726271390914917, + "learning_rate": 0.0002590443488488465, + "loss": 0.4601683807373047, + "mean_token_accuracy": 0.8620512077212333, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5105714881420136, + "epoch": 2.5977907732293697, + "grad_norm": 0.41296717524528503, + "learning_rate": 0.00025585171650432525, + "loss": 0.46279102325439453, + "mean_token_accuracy": 0.8611763519048691, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5169161760807037, + "epoch": 2.727745289148798, + "grad_norm": 0.4614253044128418, + "learning_rate": 0.0002524176936898197, + "loss": 0.45492774963378907, + "mean_token_accuracy": 0.8627680170536042, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.4989277676492929, + "epoch": 2.857699805068226, + "grad_norm": 0.37512704730033875, + "learning_rate": 0.00024874933646223225, + "loss": 0.4531984329223633, + "mean_token_accuracy": 0.8637665447592735, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5177617704868317, + "epoch": 2.9876543209876543, + "grad_norm": 0.3700532019138336, + "learning_rate": 0.00024485418237699976, + "loss": 0.45844474792480466, + "mean_token_accuracy": 0.8626988258957863, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5253989950108987, + "eval_loss": 0.5857328176498413, + "eval_mean_token_accuracy": 0.8360884573597175, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.341, + "eval_samples_per_second": 31.064, + "eval_steps_per_second": 3.899, + "step": 1155 + }, + { + "entropy": 0.4535361140517134, + "epoch": 3.116959064327485, + "grad_norm": 0.3412795662879944, + "learning_rate": 0.00024074023500030492, + "loss": 0.3942829132080078, + "mean_token_accuracy": 0.8781378038564519, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.44747897461056707, + "epoch": 3.246913580246914, + "grad_norm": 0.46647050976753235, + "learning_rate": 0.0002364159474637521, + "loss": 0.38986759185791015, + "mean_token_accuracy": 0.8777281475067139, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4480265176296234, + "epoch": 3.3768680961663415, + "grad_norm": 0.4068582355976105, + "learning_rate": 0.00023189020509529866, + "loss": 0.39444759368896487, + "mean_token_accuracy": 0.8774515727162361, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.45180007234215736, + "epoch": 3.50682261208577, + "grad_norm": 0.4249928593635559, + "learning_rate": 0.00022717230716213122, + "loss": 0.3977077102661133, + "mean_token_accuracy": 0.8762744688987731, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.4614932192862034, + "epoch": 3.636777128005198, + "grad_norm": 0.561008095741272, + "learning_rate": 0.00022227194776300045, + "loss": 0.4022808456420898, + "mean_token_accuracy": 0.8760285252332687, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4414680179953575, + "epoch": 3.7667316439246266, + "grad_norm": 0.38943538069725037, + "learning_rate": 0.00021719919590927584, + "loss": 0.38586376190185545, + "mean_token_accuracy": 0.8783121705055237, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45685607343912127, + "epoch": 3.8966861598440543, + "grad_norm": 0.5362406969070435, + "learning_rate": 0.00021196447483564875, + "loss": 0.3983576583862305, + "mean_token_accuracy": 0.8764419692754746, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49981127894268584, + "eval_loss": 0.5997208952903748, + "eval_mean_token_accuracy": 0.8368159819107789, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.4304, + "eval_samples_per_second": 31.012, + "eval_steps_per_second": 3.893, + "step": 1540 + }, + { + "entropy": 0.4439909208060509, + "epoch": 4.025990903183885, + "grad_norm": 0.5490113496780396, + "learning_rate": 0.00020657854058299564, + "loss": 0.38307292938232423, + "mean_token_accuracy": 0.8795150506436525, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.3837250977009535, + "epoch": 4.155945419103314, + "grad_norm": 0.5567234754562378, + "learning_rate": 0.0002010524598974076, + "loss": 0.3182963752746582, + "mean_token_accuracy": 0.8964017608761787, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.377872094810009, + "epoch": 4.2858999350227425, + "grad_norm": 0.4315710961818695, + "learning_rate": 0.00019539758749079845, + "loss": 0.318333683013916, + "mean_token_accuracy": 0.8963816618919372, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.38739304527640345, + "epoch": 4.41585445094217, + "grad_norm": 0.49140632152557373, + "learning_rate": 0.00018962554270981555, + "loss": 0.32688804626464846, + "mean_token_accuracy": 0.8937860554456711, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.39157475270330905, + "epoch": 4.545808966861598, + "grad_norm": 0.40667369961738586, + "learning_rate": 0.00018374818566099208, + "loss": 0.3305763626098633, + "mean_token_accuracy": 0.8916732975840569, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.3838599680364132, + "epoch": 4.675763482781027, + "grad_norm": 0.4632417857646942, + "learning_rate": 0.0001777775928411983, + "loss": 0.3267818450927734, + "mean_token_accuracy": 0.8946500706672669, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.38270787581801413, + "epoch": 4.805717998700455, + "grad_norm": 0.5529720187187195, + "learning_rate": 0.0001717260323234649, + "loss": 0.3264235305786133, + "mean_token_accuracy": 0.8948800846934318, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.38736109718680384, + "epoch": 4.935672514619883, + "grad_norm": 0.5604785680770874, + "learning_rate": 0.00016560593854916497, + "loss": 0.3280513381958008, + "mean_token_accuracy": 0.8931388029456139, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4370518812479881, + "eval_loss": 0.6210553050041199, + "eval_mean_token_accuracy": 0.8379779781859654, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.4039, + "eval_samples_per_second": 31.028, + "eval_steps_per_second": 3.895, + "step": 1925 + }, + { + "entropy": 0.3330705868988181, + "epoch": 5.064977257959714, + "grad_norm": 0.5386723875999451, + "learning_rate": 0.0001594298867783512, + "loss": 0.2754818344116211, + "mean_token_accuracy": 0.9101201346771202, + "num_tokens": 5739445.0, + "step": 1950 + }, + { + "entropy": 0.2980620255321264, + "epoch": 5.1949317738791425, + "grad_norm": 0.5633581876754761, + "learning_rate": 0.00015321056725074549, + "loss": 0.23754241943359375, + "mean_token_accuracy": 0.9203532826900482, + "num_tokens": 5888043.0, + "step": 2000 + }, + { + "entropy": 0.3111592583358288, + "epoch": 5.32488628979857, + "grad_norm": 0.5031015872955322, + "learning_rate": 0.0001469607591104745, + "loss": 0.24428102493286133, + "mean_token_accuracy": 0.917181601524353, + "num_tokens": 6031284.0, + "step": 2050 + }, + { + "entropy": 0.31522042460739613, + "epoch": 5.454840805717999, + "grad_norm": 0.6432453393936157, + "learning_rate": 0.0001406933041481286, + "loss": 0.25112478256225584, + "mean_token_accuracy": 0.9152472382783889, + "num_tokens": 6179927.0, + "step": 2100 + }, + { + "entropy": 0.3046229027956724, + "epoch": 5.584795321637427, + "grad_norm": 0.5104537606239319, + "learning_rate": 0.00013442108041409814, + "loss": 0.2431495475769043, + "mean_token_accuracy": 0.917829519212246, + "num_tokens": 6322630.0, + "step": 2150 + }, + { + "entropy": 0.30440517760813235, + "epoch": 5.714749837556855, + "grad_norm": 0.5307765603065491, + "learning_rate": 0.0001281569757574053, + "loss": 0.24610313415527343, + "mean_token_accuracy": 0.9166415151953697, + "num_tokens": 6469843.0, + "step": 2200 + }, + { + "entropy": 0.304359400421381, + "epoch": 5.844704353476283, + "grad_norm": 0.5014523267745972, + "learning_rate": 0.00012191386134440133, + "loss": 0.24548973083496095, + "mean_token_accuracy": 0.9165477308630944, + "num_tokens": 6617768.0, + "step": 2250 + }, + { + "entropy": 0.3141418205201626, + "epoch": 5.974658869395712, + "grad_norm": 0.567398726940155, + "learning_rate": 0.00011570456521174339, + "loss": 0.24975168228149414, + "mean_token_accuracy": 0.9139353120326996, + "num_tokens": 6761187.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.3758909202252443, + "eval_loss": 0.6843022108078003, + "eval_mean_token_accuracy": 0.8348075449466705, + "eval_num_tokens": 6792840.0, + "eval_runtime": 53.3825, + "eval_samples_per_second": 31.04, + "eval_steps_per_second": 3.896, + "step": 2310 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.9583147604912435e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b7b923a301af4113e0aa591d097678b1fa73025c --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.009078376988692594, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d24576b18858c4492e2ba57560be3246abe492c1 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2695/trainer_state.json @@ -0,0 +1,641 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 2695, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.2479030179977415, + "epoch": 0.1299545159194282, + "grad_norm": 1.519571304321289, + "learning_rate": 3.522207847653314e-05, + "loss": 2.093206329345703, + "mean_token_accuracy": 0.6068353663384914, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.932415626347065, + "epoch": 0.2599090318388564, + "grad_norm": 1.180830955505371, + "learning_rate": 7.11629748811588e-05, + "loss": 0.8930854797363281, + "mean_token_accuracy": 0.7708445385098457, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.7730373838543891, + "epoch": 0.3898635477582846, + "grad_norm": 0.7839977145195007, + "learning_rate": 0.00010710387128578447, + "loss": 0.7302116394042969, + "mean_token_accuracy": 0.8012136635184288, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6934178560972214, + "epoch": 0.5198180636777128, + "grad_norm": 0.666778564453125, + "learning_rate": 0.0001430447676904101, + "loss": 0.6505754852294922, + "mean_token_accuracy": 0.8195212116837501, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6900296103954315, + "epoch": 0.649772579597141, + "grad_norm": 0.6762415766716003, + "learning_rate": 0.00017898566409503577, + "loss": 0.6378536987304687, + "mean_token_accuracy": 0.8223087686300278, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.667421719878912, + "epoch": 0.7797270955165692, + "grad_norm": 0.5047685503959656, + "learning_rate": 0.00021492656049966144, + "loss": 0.6148524856567383, + "mean_token_accuracy": 0.8280292323231697, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6388977643847465, + "epoch": 0.9096816114359974, + "grad_norm": 0.4360353350639343, + "learning_rate": 0.0002508674569042871, + "loss": 0.5933729553222656, + "mean_token_accuracy": 0.8329134130477905, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6612381511009656, + "eval_loss": 0.6559221744537354, + "eval_mean_token_accuracy": 0.8195324820967821, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.4007, + "eval_samples_per_second": 31.03, + "eval_steps_per_second": 3.895, + "step": 385 + }, + { + "entropy": 0.6224366770916848, + "epoch": 1.0389863547758285, + "grad_norm": 0.5294668078422546, + "learning_rate": 0.00027673375518355765, + "loss": 0.5677951431274414, + "mean_token_accuracy": 0.8380465067211708, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5827466724812984, + "epoch": 1.1689408706952567, + "grad_norm": 0.5172416567802429, + "learning_rate": 0.0002765120122346144, + "loss": 0.5423126983642578, + "mean_token_accuracy": 0.8467991036176682, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5831517253816128, + "epoch": 1.2988953866146848, + "grad_norm": 0.41916292905807495, + "learning_rate": 0.0002760064270819138, + "loss": 0.534448013305664, + "mean_token_accuracy": 0.8456632816791534, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5869986982643605, + "epoch": 1.428849902534113, + "grad_norm": 0.4387759566307068, + "learning_rate": 0.00027521803857633113, + "loss": 0.5367491912841796, + "mean_token_accuracy": 0.8462416216731071, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5771756853163242, + "epoch": 1.5588044184535412, + "grad_norm": 0.49079665541648865, + "learning_rate": 0.00027414846665880935, + "loss": 0.5238623809814453, + "mean_token_accuracy": 0.84760089635849, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.5549105909466744, + "epoch": 1.6887589343729694, + "grad_norm": 0.4000363051891327, + "learning_rate": 0.0002727999090317863, + "loss": 0.510434226989746, + "mean_token_accuracy": 0.8517858856916427, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.583413660377264, + "epoch": 1.8187134502923976, + "grad_norm": 0.33592426776885986, + "learning_rate": 0.00027117513664346674, + "loss": 0.5297993850708008, + "mean_token_accuracy": 0.846615691781044, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5732646904885769, + "epoch": 1.9486679662118258, + "grad_norm": 0.5528839230537415, + "learning_rate": 0.00026927748799421714, + "loss": 0.5219194793701172, + "mean_token_accuracy": 0.8489033079147339, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6027900550801021, + "eval_loss": 0.5946928858757019, + "eval_mean_token_accuracy": 0.8318195798649237, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.3837, + "eval_samples_per_second": 31.039, + "eval_steps_per_second": 3.896, + "step": 770 + }, + { + "entropy": 0.5329899657611272, + "epoch": 2.077972709551657, + "grad_norm": 0.45793575048446655, + "learning_rate": 0.0002671108622767842, + "loss": 0.48420516967773436, + "mean_token_accuracy": 0.8578200301333289, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5142687204480171, + "epoch": 2.207927225471085, + "grad_norm": 0.4690960645675659, + "learning_rate": 0.0002646797113644295, + "loss": 0.4593114471435547, + "mean_token_accuracy": 0.8622670090198516, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5135884112119675, + "epoch": 2.3378817413905133, + "grad_norm": 0.3752821683883667, + "learning_rate": 0.00026198903066344565, + "loss": 0.4626216125488281, + "mean_token_accuracy": 0.8612511262297631, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5137367483973503, + "epoch": 2.4678362573099415, + "grad_norm": 0.3726271390914917, + "learning_rate": 0.0002590443488488465, + "loss": 0.4601683807373047, + "mean_token_accuracy": 0.8620512077212333, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5105714881420136, + "epoch": 2.5977907732293697, + "grad_norm": 0.41296717524528503, + "learning_rate": 0.00025585171650432525, + "loss": 0.46279102325439453, + "mean_token_accuracy": 0.8611763519048691, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5169161760807037, + "epoch": 2.727745289148798, + "grad_norm": 0.4614253044128418, + "learning_rate": 0.0002524176936898197, + "loss": 0.45492774963378907, + "mean_token_accuracy": 0.8627680170536042, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.4989277676492929, + "epoch": 2.857699805068226, + "grad_norm": 0.37512704730033875, + "learning_rate": 0.00024874933646223225, + "loss": 0.4531984329223633, + "mean_token_accuracy": 0.8637665447592735, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5177617704868317, + "epoch": 2.9876543209876543, + "grad_norm": 0.3700532019138336, + "learning_rate": 0.00024485418237699976, + "loss": 0.45844474792480466, + "mean_token_accuracy": 0.8626988258957863, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5253989950108987, + "eval_loss": 0.5857328176498413, + "eval_mean_token_accuracy": 0.8360884573597175, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.341, + "eval_samples_per_second": 31.064, + "eval_steps_per_second": 3.899, + "step": 1155 + }, + { + "entropy": 0.4535361140517134, + "epoch": 3.116959064327485, + "grad_norm": 0.3412795662879944, + "learning_rate": 0.00024074023500030492, + "loss": 0.3942829132080078, + "mean_token_accuracy": 0.8781378038564519, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.44747897461056707, + "epoch": 3.246913580246914, + "grad_norm": 0.46647050976753235, + "learning_rate": 0.0002364159474637521, + "loss": 0.38986759185791015, + "mean_token_accuracy": 0.8777281475067139, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4480265176296234, + "epoch": 3.3768680961663415, + "grad_norm": 0.4068582355976105, + "learning_rate": 0.00023189020509529866, + "loss": 0.39444759368896487, + "mean_token_accuracy": 0.8774515727162361, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.45180007234215736, + "epoch": 3.50682261208577, + "grad_norm": 0.4249928593635559, + "learning_rate": 0.00022717230716213122, + "loss": 0.3977077102661133, + "mean_token_accuracy": 0.8762744688987731, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.4614932192862034, + "epoch": 3.636777128005198, + "grad_norm": 0.561008095741272, + "learning_rate": 0.00022227194776300045, + "loss": 0.4022808456420898, + "mean_token_accuracy": 0.8760285252332687, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4414680179953575, + "epoch": 3.7667316439246266, + "grad_norm": 0.38943538069725037, + "learning_rate": 0.00021719919590927584, + "loss": 0.38586376190185545, + "mean_token_accuracy": 0.8783121705055237, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45685607343912127, + "epoch": 3.8966861598440543, + "grad_norm": 0.5362406969070435, + "learning_rate": 0.00021196447483564875, + "loss": 0.3983576583862305, + "mean_token_accuracy": 0.8764419692754746, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49981127894268584, + "eval_loss": 0.5997208952903748, + "eval_mean_token_accuracy": 0.8368159819107789, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.4304, + "eval_samples_per_second": 31.012, + "eval_steps_per_second": 3.893, + "step": 1540 + }, + { + "entropy": 0.4439909208060509, + "epoch": 4.025990903183885, + "grad_norm": 0.5490113496780396, + "learning_rate": 0.00020657854058299564, + "loss": 0.38307292938232423, + "mean_token_accuracy": 0.8795150506436525, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.3837250977009535, + "epoch": 4.155945419103314, + "grad_norm": 0.5567234754562378, + "learning_rate": 0.0002010524598974076, + "loss": 0.3182963752746582, + "mean_token_accuracy": 0.8964017608761787, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.377872094810009, + "epoch": 4.2858999350227425, + "grad_norm": 0.4315710961818695, + "learning_rate": 0.00019539758749079845, + "loss": 0.318333683013916, + "mean_token_accuracy": 0.8963816618919372, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.38739304527640345, + "epoch": 4.41585445094217, + "grad_norm": 0.49140632152557373, + "learning_rate": 0.00018962554270981555, + "loss": 0.32688804626464846, + "mean_token_accuracy": 0.8937860554456711, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.39157475270330905, + "epoch": 4.545808966861598, + "grad_norm": 0.40667369961738586, + "learning_rate": 0.00018374818566099208, + "loss": 0.3305763626098633, + "mean_token_accuracy": 0.8916732975840569, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.3838599680364132, + "epoch": 4.675763482781027, + "grad_norm": 0.4632417857646942, + "learning_rate": 0.0001777775928411983, + "loss": 0.3267818450927734, + "mean_token_accuracy": 0.8946500706672669, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.38270787581801413, + "epoch": 4.805717998700455, + "grad_norm": 0.5529720187187195, + "learning_rate": 0.0001717260323234649, + "loss": 0.3264235305786133, + "mean_token_accuracy": 0.8948800846934318, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.38736109718680384, + "epoch": 4.935672514619883, + "grad_norm": 0.5604785680770874, + "learning_rate": 0.00016560593854916497, + "loss": 0.3280513381958008, + "mean_token_accuracy": 0.8931388029456139, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4370518812479881, + "eval_loss": 0.6210553050041199, + "eval_mean_token_accuracy": 0.8379779781859654, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.4039, + "eval_samples_per_second": 31.028, + "eval_steps_per_second": 3.895, + "step": 1925 + }, + { + "entropy": 0.3330705868988181, + "epoch": 5.064977257959714, + "grad_norm": 0.5386723875999451, + "learning_rate": 0.0001594298867783512, + "loss": 0.2754818344116211, + "mean_token_accuracy": 0.9101201346771202, + "num_tokens": 5739445.0, + "step": 1950 + }, + { + "entropy": 0.2980620255321264, + "epoch": 5.1949317738791425, + "grad_norm": 0.5633581876754761, + "learning_rate": 0.00015321056725074549, + "loss": 0.23754241943359375, + "mean_token_accuracy": 0.9203532826900482, + "num_tokens": 5888043.0, + "step": 2000 + }, + { + "entropy": 0.3111592583358288, + "epoch": 5.32488628979857, + "grad_norm": 0.5031015872955322, + "learning_rate": 0.0001469607591104745, + "loss": 0.24428102493286133, + "mean_token_accuracy": 0.917181601524353, + "num_tokens": 6031284.0, + "step": 2050 + }, + { + "entropy": 0.31522042460739613, + "epoch": 5.454840805717999, + "grad_norm": 0.6432453393936157, + "learning_rate": 0.0001406933041481286, + "loss": 0.25112478256225584, + "mean_token_accuracy": 0.9152472382783889, + "num_tokens": 6179927.0, + "step": 2100 + }, + { + "entropy": 0.3046229027956724, + "epoch": 5.584795321637427, + "grad_norm": 0.5104537606239319, + "learning_rate": 0.00013442108041409814, + "loss": 0.2431495475769043, + "mean_token_accuracy": 0.917829519212246, + "num_tokens": 6322630.0, + "step": 2150 + }, + { + "entropy": 0.30440517760813235, + "epoch": 5.714749837556855, + "grad_norm": 0.5307765603065491, + "learning_rate": 0.0001281569757574053, + "loss": 0.24610313415527343, + "mean_token_accuracy": 0.9166415151953697, + "num_tokens": 6469843.0, + "step": 2200 + }, + { + "entropy": 0.304359400421381, + "epoch": 5.844704353476283, + "grad_norm": 0.5014523267745972, + "learning_rate": 0.00012191386134440133, + "loss": 0.24548973083496095, + "mean_token_accuracy": 0.9165477308630944, + "num_tokens": 6617768.0, + "step": 2250 + }, + { + "entropy": 0.3141418205201626, + "epoch": 5.974658869395712, + "grad_norm": 0.567398726940155, + "learning_rate": 0.00011570456521174339, + "loss": 0.24975168228149414, + "mean_token_accuracy": 0.9139353120326996, + "num_tokens": 6761187.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.3758909202252443, + "eval_loss": 0.6843022108078003, + "eval_mean_token_accuracy": 0.8348075449466705, + "eval_num_tokens": 6792840.0, + "eval_runtime": 53.3825, + "eval_samples_per_second": 31.04, + "eval_steps_per_second": 3.896, + "step": 2310 + }, + { + "entropy": 0.24346149432000203, + "epoch": 6.1039636127355426, + "grad_norm": 0.7140825986862183, + "learning_rate": 0.00010954184590799172, + "loss": 0.17231273651123047, + "mean_token_accuracy": 0.9407721275660261, + "num_tokens": 6909578.0, + "step": 2350 + }, + { + "entropy": 0.2160973483324051, + "epoch": 6.23391812865497, + "grad_norm": 0.49014952778816223, + "learning_rate": 0.00010343836627798716, + "loss": 0.15455107688903807, + "mean_token_accuracy": 0.9467655989527702, + "num_tokens": 7056244.0, + "step": 2400 + }, + { + "entropy": 0.21491924367845058, + "epoch": 6.363872644574399, + "grad_norm": 0.5529471635818481, + "learning_rate": 9.740666744387656e-05, + "loss": 0.1584029197692871, + "mean_token_accuracy": 0.9460993978381157, + "num_tokens": 7206950.0, + "step": 2450 + }, + { + "entropy": 0.22037068914622068, + "epoch": 6.493827160493828, + "grad_norm": 0.6232843995094299, + "learning_rate": 9.145914303624717e-05, + "loss": 0.15544342041015624, + "mean_token_accuracy": 0.9450622496008872, + "num_tokens": 7359429.0, + "step": 2500 + }, + { + "entropy": 0.2320463878661394, + "epoch": 6.623781676413255, + "grad_norm": 0.7459681630134583, + "learning_rate": 8.560801372831975e-05, + "loss": 0.16350215911865235, + "mean_token_accuracy": 0.9416968420147895, + "num_tokens": 7499281.0, + "step": 2550 + }, + { + "entropy": 0.22943626195192338, + "epoch": 6.753736192332683, + "grad_norm": 0.7482302784919739, + "learning_rate": 7.986530212552506e-05, + "loss": 0.16422538757324218, + "mean_token_accuracy": 0.9434959614276885, + "num_tokens": 7640758.0, + "step": 2600 + }, + { + "entropy": 0.21795938543975354, + "epoch": 6.883690708252112, + "grad_norm": 0.5210486054420471, + "learning_rate": 7.424280806206118e-05, + "loss": 0.15540474891662598, + "mean_token_accuracy": 0.9459306105971337, + "num_tokens": 7791986.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.3157534837149657, + "eval_loss": 0.7744874954223633, + "eval_mean_token_accuracy": 0.8346395036922052, + "eval_num_tokens": 7924980.0, + "eval_runtime": 53.3771, + "eval_samples_per_second": 31.043, + "eval_steps_per_second": 3.897, + "step": 2695 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.4509364053876326e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b7b923a301af4113e0aa591d097678b1fa73025c --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.009078376988692594, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b458001da9b7a1c6b862e5c5b8b61840518786bd --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3080/trainer_state.json @@ -0,0 +1,732 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 3080, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.2479030179977415, + "epoch": 0.1299545159194282, + "grad_norm": 1.519571304321289, + "learning_rate": 3.522207847653314e-05, + "loss": 2.093206329345703, + "mean_token_accuracy": 0.6068353663384914, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.932415626347065, + "epoch": 0.2599090318388564, + "grad_norm": 1.180830955505371, + "learning_rate": 7.11629748811588e-05, + "loss": 0.8930854797363281, + "mean_token_accuracy": 0.7708445385098457, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.7730373838543891, + "epoch": 0.3898635477582846, + "grad_norm": 0.7839977145195007, + "learning_rate": 0.00010710387128578447, + "loss": 0.7302116394042969, + "mean_token_accuracy": 0.8012136635184288, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6934178560972214, + "epoch": 0.5198180636777128, + "grad_norm": 0.666778564453125, + "learning_rate": 0.0001430447676904101, + "loss": 0.6505754852294922, + "mean_token_accuracy": 0.8195212116837501, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6900296103954315, + "epoch": 0.649772579597141, + "grad_norm": 0.6762415766716003, + "learning_rate": 0.00017898566409503577, + "loss": 0.6378536987304687, + "mean_token_accuracy": 0.8223087686300278, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.667421719878912, + "epoch": 0.7797270955165692, + "grad_norm": 0.5047685503959656, + "learning_rate": 0.00021492656049966144, + "loss": 0.6148524856567383, + "mean_token_accuracy": 0.8280292323231697, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6388977643847465, + "epoch": 0.9096816114359974, + "grad_norm": 0.4360353350639343, + "learning_rate": 0.0002508674569042871, + "loss": 0.5933729553222656, + "mean_token_accuracy": 0.8329134130477905, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6612381511009656, + "eval_loss": 0.6559221744537354, + "eval_mean_token_accuracy": 0.8195324820967821, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.4007, + "eval_samples_per_second": 31.03, + "eval_steps_per_second": 3.895, + "step": 385 + }, + { + "entropy": 0.6224366770916848, + "epoch": 1.0389863547758285, + "grad_norm": 0.5294668078422546, + "learning_rate": 0.00027673375518355765, + "loss": 0.5677951431274414, + "mean_token_accuracy": 0.8380465067211708, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5827466724812984, + "epoch": 1.1689408706952567, + "grad_norm": 0.5172416567802429, + "learning_rate": 0.0002765120122346144, + "loss": 0.5423126983642578, + "mean_token_accuracy": 0.8467991036176682, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5831517253816128, + "epoch": 1.2988953866146848, + "grad_norm": 0.41916292905807495, + "learning_rate": 0.0002760064270819138, + "loss": 0.534448013305664, + "mean_token_accuracy": 0.8456632816791534, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5869986982643605, + "epoch": 1.428849902534113, + "grad_norm": 0.4387759566307068, + "learning_rate": 0.00027521803857633113, + "loss": 0.5367491912841796, + "mean_token_accuracy": 0.8462416216731071, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5771756853163242, + "epoch": 1.5588044184535412, + "grad_norm": 0.49079665541648865, + "learning_rate": 0.00027414846665880935, + "loss": 0.5238623809814453, + "mean_token_accuracy": 0.84760089635849, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.5549105909466744, + "epoch": 1.6887589343729694, + "grad_norm": 0.4000363051891327, + "learning_rate": 0.0002727999090317863, + "loss": 0.510434226989746, + "mean_token_accuracy": 0.8517858856916427, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.583413660377264, + "epoch": 1.8187134502923976, + "grad_norm": 0.33592426776885986, + "learning_rate": 0.00027117513664346674, + "loss": 0.5297993850708008, + "mean_token_accuracy": 0.846615691781044, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5732646904885769, + "epoch": 1.9486679662118258, + "grad_norm": 0.5528839230537415, + "learning_rate": 0.00026927748799421714, + "loss": 0.5219194793701172, + "mean_token_accuracy": 0.8489033079147339, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6027900550801021, + "eval_loss": 0.5946928858757019, + "eval_mean_token_accuracy": 0.8318195798649237, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.3837, + "eval_samples_per_second": 31.039, + "eval_steps_per_second": 3.896, + "step": 770 + }, + { + "entropy": 0.5329899657611272, + "epoch": 2.077972709551657, + "grad_norm": 0.45793575048446655, + "learning_rate": 0.0002671108622767842, + "loss": 0.48420516967773436, + "mean_token_accuracy": 0.8578200301333289, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5142687204480171, + "epoch": 2.207927225471085, + "grad_norm": 0.4690960645675659, + "learning_rate": 0.0002646797113644295, + "loss": 0.4593114471435547, + "mean_token_accuracy": 0.8622670090198516, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5135884112119675, + "epoch": 2.3378817413905133, + "grad_norm": 0.3752821683883667, + "learning_rate": 0.00026198903066344565, + "loss": 0.4626216125488281, + "mean_token_accuracy": 0.8612511262297631, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5137367483973503, + "epoch": 2.4678362573099415, + "grad_norm": 0.3726271390914917, + "learning_rate": 0.0002590443488488465, + "loss": 0.4601683807373047, + "mean_token_accuracy": 0.8620512077212333, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5105714881420136, + "epoch": 2.5977907732293697, + "grad_norm": 0.41296717524528503, + "learning_rate": 0.00025585171650432525, + "loss": 0.46279102325439453, + "mean_token_accuracy": 0.8611763519048691, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5169161760807037, + "epoch": 2.727745289148798, + "grad_norm": 0.4614253044128418, + "learning_rate": 0.0002524176936898197, + "loss": 0.45492774963378907, + "mean_token_accuracy": 0.8627680170536042, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.4989277676492929, + "epoch": 2.857699805068226, + "grad_norm": 0.37512704730033875, + "learning_rate": 0.00024874933646223225, + "loss": 0.4531984329223633, + "mean_token_accuracy": 0.8637665447592735, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5177617704868317, + "epoch": 2.9876543209876543, + "grad_norm": 0.3700532019138336, + "learning_rate": 0.00024485418237699976, + "loss": 0.45844474792480466, + "mean_token_accuracy": 0.8626988258957863, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5253989950108987, + "eval_loss": 0.5857328176498413, + "eval_mean_token_accuracy": 0.8360884573597175, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.341, + "eval_samples_per_second": 31.064, + "eval_steps_per_second": 3.899, + "step": 1155 + }, + { + "entropy": 0.4535361140517134, + "epoch": 3.116959064327485, + "grad_norm": 0.3412795662879944, + "learning_rate": 0.00024074023500030492, + "loss": 0.3942829132080078, + "mean_token_accuracy": 0.8781378038564519, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.44747897461056707, + "epoch": 3.246913580246914, + "grad_norm": 0.46647050976753235, + "learning_rate": 0.0002364159474637521, + "loss": 0.38986759185791015, + "mean_token_accuracy": 0.8777281475067139, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4480265176296234, + "epoch": 3.3768680961663415, + "grad_norm": 0.4068582355976105, + "learning_rate": 0.00023189020509529866, + "loss": 0.39444759368896487, + "mean_token_accuracy": 0.8774515727162361, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.45180007234215736, + "epoch": 3.50682261208577, + "grad_norm": 0.4249928593635559, + "learning_rate": 0.00022717230716213122, + "loss": 0.3977077102661133, + "mean_token_accuracy": 0.8762744688987731, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.4614932192862034, + "epoch": 3.636777128005198, + "grad_norm": 0.561008095741272, + "learning_rate": 0.00022227194776300045, + "loss": 0.4022808456420898, + "mean_token_accuracy": 0.8760285252332687, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4414680179953575, + "epoch": 3.7667316439246266, + "grad_norm": 0.38943538069725037, + "learning_rate": 0.00021719919590927584, + "loss": 0.38586376190185545, + "mean_token_accuracy": 0.8783121705055237, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45685607343912127, + "epoch": 3.8966861598440543, + "grad_norm": 0.5362406969070435, + "learning_rate": 0.00021196447483564875, + "loss": 0.3983576583862305, + "mean_token_accuracy": 0.8764419692754746, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49981127894268584, + "eval_loss": 0.5997208952903748, + "eval_mean_token_accuracy": 0.8368159819107789, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.4304, + "eval_samples_per_second": 31.012, + "eval_steps_per_second": 3.893, + "step": 1540 + }, + { + "entropy": 0.4439909208060509, + "epoch": 4.025990903183885, + "grad_norm": 0.5490113496780396, + "learning_rate": 0.00020657854058299564, + "loss": 0.38307292938232423, + "mean_token_accuracy": 0.8795150506436525, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.3837250977009535, + "epoch": 4.155945419103314, + "grad_norm": 0.5567234754562378, + "learning_rate": 0.0002010524598974076, + "loss": 0.3182963752746582, + "mean_token_accuracy": 0.8964017608761787, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.377872094810009, + "epoch": 4.2858999350227425, + "grad_norm": 0.4315710961818695, + "learning_rate": 0.00019539758749079845, + "loss": 0.318333683013916, + "mean_token_accuracy": 0.8963816618919372, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.38739304527640345, + "epoch": 4.41585445094217, + "grad_norm": 0.49140632152557373, + "learning_rate": 0.00018962554270981555, + "loss": 0.32688804626464846, + "mean_token_accuracy": 0.8937860554456711, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.39157475270330905, + "epoch": 4.545808966861598, + "grad_norm": 0.40667369961738586, + "learning_rate": 0.00018374818566099208, + "loss": 0.3305763626098633, + "mean_token_accuracy": 0.8916732975840569, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.3838599680364132, + "epoch": 4.675763482781027, + "grad_norm": 0.4632417857646942, + "learning_rate": 0.0001777775928411983, + "loss": 0.3267818450927734, + "mean_token_accuracy": 0.8946500706672669, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.38270787581801413, + "epoch": 4.805717998700455, + "grad_norm": 0.5529720187187195, + "learning_rate": 0.0001717260323234649, + "loss": 0.3264235305786133, + "mean_token_accuracy": 0.8948800846934318, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.38736109718680384, + "epoch": 4.935672514619883, + "grad_norm": 0.5604785680770874, + "learning_rate": 0.00016560593854916497, + "loss": 0.3280513381958008, + "mean_token_accuracy": 0.8931388029456139, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4370518812479881, + "eval_loss": 0.6210553050041199, + "eval_mean_token_accuracy": 0.8379779781859654, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.4039, + "eval_samples_per_second": 31.028, + "eval_steps_per_second": 3.895, + "step": 1925 + }, + { + "entropy": 0.3330705868988181, + "epoch": 5.064977257959714, + "grad_norm": 0.5386723875999451, + "learning_rate": 0.0001594298867783512, + "loss": 0.2754818344116211, + "mean_token_accuracy": 0.9101201346771202, + "num_tokens": 5739445.0, + "step": 1950 + }, + { + "entropy": 0.2980620255321264, + "epoch": 5.1949317738791425, + "grad_norm": 0.5633581876754761, + "learning_rate": 0.00015321056725074549, + "loss": 0.23754241943359375, + "mean_token_accuracy": 0.9203532826900482, + "num_tokens": 5888043.0, + "step": 2000 + }, + { + "entropy": 0.3111592583358288, + "epoch": 5.32488628979857, + "grad_norm": 0.5031015872955322, + "learning_rate": 0.0001469607591104745, + "loss": 0.24428102493286133, + "mean_token_accuracy": 0.917181601524353, + "num_tokens": 6031284.0, + "step": 2050 + }, + { + "entropy": 0.31522042460739613, + "epoch": 5.454840805717999, + "grad_norm": 0.6432453393936157, + "learning_rate": 0.0001406933041481286, + "loss": 0.25112478256225584, + "mean_token_accuracy": 0.9152472382783889, + "num_tokens": 6179927.0, + "step": 2100 + }, + { + "entropy": 0.3046229027956724, + "epoch": 5.584795321637427, + "grad_norm": 0.5104537606239319, + "learning_rate": 0.00013442108041409814, + "loss": 0.2431495475769043, + "mean_token_accuracy": 0.917829519212246, + "num_tokens": 6322630.0, + "step": 2150 + }, + { + "entropy": 0.30440517760813235, + "epoch": 5.714749837556855, + "grad_norm": 0.5307765603065491, + "learning_rate": 0.0001281569757574053, + "loss": 0.24610313415527343, + "mean_token_accuracy": 0.9166415151953697, + "num_tokens": 6469843.0, + "step": 2200 + }, + { + "entropy": 0.304359400421381, + "epoch": 5.844704353476283, + "grad_norm": 0.5014523267745972, + "learning_rate": 0.00012191386134440133, + "loss": 0.24548973083496095, + "mean_token_accuracy": 0.9165477308630944, + "num_tokens": 6617768.0, + "step": 2250 + }, + { + "entropy": 0.3141418205201626, + "epoch": 5.974658869395712, + "grad_norm": 0.567398726940155, + "learning_rate": 0.00011570456521174339, + "loss": 0.24975168228149414, + "mean_token_accuracy": 0.9139353120326996, + "num_tokens": 6761187.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.3758909202252443, + "eval_loss": 0.6843022108078003, + "eval_mean_token_accuracy": 0.8348075449466705, + "eval_num_tokens": 6792840.0, + "eval_runtime": 53.3825, + "eval_samples_per_second": 31.04, + "eval_steps_per_second": 3.896, + "step": 2310 + }, + { + "entropy": 0.24346149432000203, + "epoch": 6.1039636127355426, + "grad_norm": 0.7140825986862183, + "learning_rate": 0.00010954184590799172, + "loss": 0.17231273651123047, + "mean_token_accuracy": 0.9407721275660261, + "num_tokens": 6909578.0, + "step": 2350 + }, + { + "entropy": 0.2160973483324051, + "epoch": 6.23391812865497, + "grad_norm": 0.49014952778816223, + "learning_rate": 0.00010343836627798716, + "loss": 0.15455107688903807, + "mean_token_accuracy": 0.9467655989527702, + "num_tokens": 7056244.0, + "step": 2400 + }, + { + "entropy": 0.21491924367845058, + "epoch": 6.363872644574399, + "grad_norm": 0.5529471635818481, + "learning_rate": 9.740666744387656e-05, + "loss": 0.1584029197692871, + "mean_token_accuracy": 0.9460993978381157, + "num_tokens": 7206950.0, + "step": 2450 + }, + { + "entropy": 0.22037068914622068, + "epoch": 6.493827160493828, + "grad_norm": 0.6232843995094299, + "learning_rate": 9.145914303624717e-05, + "loss": 0.15544342041015624, + "mean_token_accuracy": 0.9450622496008872, + "num_tokens": 7359429.0, + "step": 2500 + }, + { + "entropy": 0.2320463878661394, + "epoch": 6.623781676413255, + "grad_norm": 0.7459681630134583, + "learning_rate": 8.560801372831975e-05, + "loss": 0.16350215911865235, + "mean_token_accuracy": 0.9416968420147895, + "num_tokens": 7499281.0, + "step": 2550 + }, + { + "entropy": 0.22943626195192338, + "epoch": 6.753736192332683, + "grad_norm": 0.7482302784919739, + "learning_rate": 7.986530212552506e-05, + "loss": 0.16422538757324218, + "mean_token_accuracy": 0.9434959614276885, + "num_tokens": 7640758.0, + "step": 2600 + }, + { + "entropy": 0.21795938543975354, + "epoch": 6.883690708252112, + "grad_norm": 0.5210486054420471, + "learning_rate": 7.424280806206118e-05, + "loss": 0.15540474891662598, + "mean_token_accuracy": 0.9459306105971337, + "num_tokens": 7791986.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.3157534837149657, + "eval_loss": 0.7744874954223633, + "eval_mean_token_accuracy": 0.8346395036922052, + "eval_num_tokens": 7924980.0, + "eval_runtime": 53.3771, + "eval_samples_per_second": 31.043, + "eval_steps_per_second": 3.897, + "step": 2695 + }, + { + "entropy": 0.2124774060656677, + "epoch": 7.012995451591943, + "grad_norm": 0.42238789796829224, + "learning_rate": 6.875208435518865e-05, + "loss": 0.14792531967163086, + "mean_token_accuracy": 0.9490461115861059, + "num_tokens": 7940521.0, + "step": 2700 + }, + { + "entropy": 0.15692965138703585, + "epoch": 7.142949967511371, + "grad_norm": 0.4711572229862213, + "learning_rate": 6.340441306708468e-05, + "loss": 0.09051708221435546, + "mean_token_accuracy": 0.9700166273117066, + "num_tokens": 8084193.0, + "step": 2750 + }, + { + "entropy": 0.15241683423519134, + "epoch": 7.272904483430799, + "grad_norm": 0.4312196671962738, + "learning_rate": 5.821078232303016e-05, + "loss": 0.08812363624572754, + "mean_token_accuracy": 0.9699361199140548, + "num_tokens": 8230159.0, + "step": 2800 + }, + { + "entropy": 0.1459079357981682, + "epoch": 7.402858999350228, + "grad_norm": 0.4804084002971649, + "learning_rate": 5.3181863733564636e-05, + "loss": 0.08675944328308105, + "mean_token_accuracy": 0.9703072866797448, + "num_tokens": 8380556.0, + "step": 2850 + }, + { + "entropy": 0.15621621005237102, + "epoch": 7.532813515269655, + "grad_norm": 0.5435478091239929, + "learning_rate": 4.83279904669986e-05, + "loss": 0.09016354560852051, + "mean_token_accuracy": 0.9674961140751839, + "num_tokens": 8523248.0, + "step": 2900 + }, + { + "entropy": 0.1545175113901496, + "epoch": 7.662768031189084, + "grad_norm": 0.524286687374115, + "learning_rate": 4.365913601734056e-05, + "loss": 0.09049141883850098, + "mean_token_accuracy": 0.9679373624920845, + "num_tokens": 8672002.0, + "step": 2950 + }, + { + "entropy": 0.1553485019877553, + "epoch": 7.792722547108512, + "grad_norm": 0.5006484389305115, + "learning_rate": 3.9184893711264495e-05, + "loss": 0.08913107872009278, + "mean_token_accuracy": 0.9684559822082519, + "num_tokens": 8816090.0, + "step": 3000 + }, + { + "entropy": 0.15444331549108029, + "epoch": 7.92267706302794, + "grad_norm": 0.5613893866539001, + "learning_rate": 3.491445699622611e-05, + "loss": 0.08711207389831543, + "mean_token_accuracy": 0.9684525722265244, + "num_tokens": 8966004.0, + "step": 3050 + }, + { + "epoch": 8.0, + "eval_entropy": 0.26580205430778175, + "eval_loss": 0.8996392488479614, + "eval_mean_token_accuracy": 0.8319417791297803, + "eval_num_tokens": 9057120.0, + "eval_runtime": 53.4042, + "eval_samples_per_second": 31.028, + "eval_steps_per_second": 3.895, + "step": 3080 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.9430479532657766e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3465/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3465/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3465/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3465/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3465/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b7b923a301af4113e0aa591d097678b1fa73025c --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3465/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.009078376988692594, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3465/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3465/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3465/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2b3a82529336a850c0920f00526bb42ba8af18aa --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/README.md @@ -0,0 +1,58 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: transformers +model_name: Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2 +tags: +- generated_from_trainer +- sft +- trl +licence: license +--- + +# Model Card for Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2 + +This model is a fine-tuned version of [Qwen/Qwen3-4B-Base](https://huggingface.co/Qwen/Qwen3-4B-Base). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/katriin-kukk/Cross_lingual_morphological_generalization/runs/5wprubge) + + + +This model was trained with SFT. + +### Framework versions + +- TRL: 0.29.0 +- Transformers: 5.5.4 +- Pytorch: 2.10.0 +- Datasets: 4.6.1 +- Tokenizers: 0.22.2 + +## Citations + + + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bd57f3f622bb596137feb69bb7bad1974a744b06 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1155/trainer_state.json @@ -0,0 +1,297 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1155, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + }, + { + "entropy": 0.6360254319169414, + "epoch": 1.0389863547758285, + "grad_norm": 0.537056028842926, + "learning_rate": 0.0004417356562802791, + "loss": 0.5937010192871094, + "mean_token_accuracy": 0.8332705884123567, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5945304277539253, + "epoch": 1.1689408706952567, + "grad_norm": 0.4016346335411072, + "learning_rate": 0.00044138169957914594, + "loss": 0.564481430053711, + "mean_token_accuracy": 0.8422813892364502, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5895386649668217, + "epoch": 1.2988953866146848, + "grad_norm": 0.32074230909347534, + "learning_rate": 0.00044057466037611984, + "loss": 0.5511369705200195, + "mean_token_accuracy": 0.8431353771686554, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5951217761635781, + "epoch": 1.428849902534113, + "grad_norm": 0.3066493272781372, + "learning_rate": 0.00043931619693465644, + "loss": 0.5558741760253906, + "mean_token_accuracy": 0.84255860298872, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5773422825336456, + "epoch": 1.5588044184535412, + "grad_norm": 0.38174429535865784, + "learning_rate": 0.00043760889508197114, + "loss": 0.5448895263671875, + "mean_token_accuracy": 0.8427786123752594, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.568668949753046, + "epoch": 1.6887589343729694, + "grad_norm": 0.4095861613750458, + "learning_rate": 0.0004354562628958121, + "loss": 0.5313994598388672, + "mean_token_accuracy": 0.847584228515625, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.5952580836415291, + "epoch": 1.8187134502923976, + "grad_norm": 0.34596189856529236, + "learning_rate": 0.0004328627234962387, + "loss": 0.5546007919311523, + "mean_token_accuracy": 0.8419265493750572, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5800049532949925, + "epoch": 1.9486679662118258, + "grad_norm": 0.3752591609954834, + "learning_rate": 0.0004298336059572178, + "loss": 0.5452734756469727, + "mean_token_accuracy": 0.8436958035826683, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6070062083980212, + "eval_loss": 0.631233274936676, + "eval_mean_token_accuracy": 0.8255004269572405, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.1092, + "eval_samples_per_second": 31.2, + "eval_steps_per_second": 3.916, + "step": 770 + }, + { + "entropy": 0.5432504566770103, + "epoch": 2.077972709551657, + "grad_norm": 0.4615200459957123, + "learning_rate": 0.0004263751343567108, + "loss": 0.5117238616943359, + "mean_token_accuracy": 0.8515748070113024, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5108437672257423, + "epoch": 2.207927225471085, + "grad_norm": 0.39224034547805786, + "learning_rate": 0.0004224944149877526, + "loss": 0.47819324493408205, + "mean_token_accuracy": 0.8583902576565743, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5110109977424144, + "epoch": 2.3378817413905133, + "grad_norm": 0.36829307675361633, + "learning_rate": 0.00041819942175679877, + "loss": 0.4832262420654297, + "mean_token_accuracy": 0.8568637049198151, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5188203908503055, + "epoch": 2.4678362573099415, + "grad_norm": 0.47326067090034485, + "learning_rate": 0.00041349897979934495, + "loss": 0.48348499298095704, + "mean_token_accuracy": 0.8567950987815857, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5181271532177925, + "epoch": 2.5977907732293697, + "grad_norm": 0.2972731292247772, + "learning_rate": 0.0004084027473464832, + "loss": 0.4918654251098633, + "mean_token_accuracy": 0.8539192593097686, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5172442196309567, + "epoch": 2.727745289148798, + "grad_norm": 0.4516422152519226, + "learning_rate": 0.0004029211958796559, + "loss": 0.4767612457275391, + "mean_token_accuracy": 0.8575084239244462, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.5078031922131777, + "epoch": 2.857699805068226, + "grad_norm": 0.28876274824142456, + "learning_rate": 0.00039706558861438384, + "loss": 0.47852867126464843, + "mean_token_accuracy": 0.8580548599362373, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5267327691614628, + "epoch": 2.9876543209876543, + "grad_norm": 0.29029977321624756, + "learning_rate": 0.000390847957357179, + "loss": 0.48259868621826174, + "mean_token_accuracy": 0.8568289718031883, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5411632635559027, + "eval_loss": 0.5956819653511047, + "eval_mean_token_accuracy": 0.8327134661376476, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.1279, + "eval_samples_per_second": 31.189, + "eval_steps_per_second": 3.915, + "step": 1155 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.562044974242181e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..262b7d0cdecdbe75f2f10e1feb40b561000e7995 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1540/trainer_state.json @@ -0,0 +1,378 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 1540, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + }, + { + "entropy": 0.6360254319169414, + "epoch": 1.0389863547758285, + "grad_norm": 0.537056028842926, + "learning_rate": 0.0004417356562802791, + "loss": 0.5937010192871094, + "mean_token_accuracy": 0.8332705884123567, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5945304277539253, + "epoch": 1.1689408706952567, + "grad_norm": 0.4016346335411072, + "learning_rate": 0.00044138169957914594, + "loss": 0.564481430053711, + "mean_token_accuracy": 0.8422813892364502, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5895386649668217, + "epoch": 1.2988953866146848, + "grad_norm": 0.32074230909347534, + "learning_rate": 0.00044057466037611984, + "loss": 0.5511369705200195, + "mean_token_accuracy": 0.8431353771686554, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5951217761635781, + "epoch": 1.428849902534113, + "grad_norm": 0.3066493272781372, + "learning_rate": 0.00043931619693465644, + "loss": 0.5558741760253906, + "mean_token_accuracy": 0.84255860298872, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5773422825336456, + "epoch": 1.5588044184535412, + "grad_norm": 0.38174429535865784, + "learning_rate": 0.00043760889508197114, + "loss": 0.5448895263671875, + "mean_token_accuracy": 0.8427786123752594, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.568668949753046, + "epoch": 1.6887589343729694, + "grad_norm": 0.4095861613750458, + "learning_rate": 0.0004354562628958121, + "loss": 0.5313994598388672, + "mean_token_accuracy": 0.847584228515625, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.5952580836415291, + "epoch": 1.8187134502923976, + "grad_norm": 0.34596189856529236, + "learning_rate": 0.0004328627234962387, + "loss": 0.5546007919311523, + "mean_token_accuracy": 0.8419265493750572, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5800049532949925, + "epoch": 1.9486679662118258, + "grad_norm": 0.3752591609954834, + "learning_rate": 0.0004298336059572178, + "loss": 0.5452734756469727, + "mean_token_accuracy": 0.8436958035826683, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6070062083980212, + "eval_loss": 0.631233274936676, + "eval_mean_token_accuracy": 0.8255004269572405, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.1092, + "eval_samples_per_second": 31.2, + "eval_steps_per_second": 3.916, + "step": 770 + }, + { + "entropy": 0.5432504566770103, + "epoch": 2.077972709551657, + "grad_norm": 0.4615200459957123, + "learning_rate": 0.0004263751343567108, + "loss": 0.5117238616943359, + "mean_token_accuracy": 0.8515748070113024, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5108437672257423, + "epoch": 2.207927225471085, + "grad_norm": 0.39224034547805786, + "learning_rate": 0.0004224944149877526, + "loss": 0.47819324493408205, + "mean_token_accuracy": 0.8583902576565743, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5110109977424144, + "epoch": 2.3378817413905133, + "grad_norm": 0.36829307675361633, + "learning_rate": 0.00041819942175679877, + "loss": 0.4832262420654297, + "mean_token_accuracy": 0.8568637049198151, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5188203908503055, + "epoch": 2.4678362573099415, + "grad_norm": 0.47326067090034485, + "learning_rate": 0.00041349897979934495, + "loss": 0.48348499298095704, + "mean_token_accuracy": 0.8567950987815857, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5181271532177925, + "epoch": 2.5977907732293697, + "grad_norm": 0.2972731292247772, + "learning_rate": 0.0004084027473464832, + "loss": 0.4918654251098633, + "mean_token_accuracy": 0.8539192593097686, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5172442196309567, + "epoch": 2.727745289148798, + "grad_norm": 0.4516422152519226, + "learning_rate": 0.0004029211958796559, + "loss": 0.4767612457275391, + "mean_token_accuracy": 0.8575084239244462, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.5078031922131777, + "epoch": 2.857699805068226, + "grad_norm": 0.28876274824142456, + "learning_rate": 0.00039706558861438384, + "loss": 0.47852867126464843, + "mean_token_accuracy": 0.8580548599362373, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5267327691614628, + "epoch": 2.9876543209876543, + "grad_norm": 0.29029977321624756, + "learning_rate": 0.000390847957357179, + "loss": 0.48259868621826174, + "mean_token_accuracy": 0.8568289718031883, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5411632635559027, + "eval_loss": 0.5956819653511047, + "eval_mean_token_accuracy": 0.8327134661376476, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.1279, + "eval_samples_per_second": 31.189, + "eval_steps_per_second": 3.915, + "step": 1155 + }, + { + "entropy": 0.44545789564674226, + "epoch": 3.116959064327485, + "grad_norm": 0.3036324381828308, + "learning_rate": 0.00038428107778319486, + "loss": 0.4016357421875, + "mean_token_accuracy": 0.8744772864346528, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.4417941153049469, + "epoch": 3.246913580246914, + "grad_norm": 0.3334718942642212, + "learning_rate": 0.0003773784431854142, + "loss": 0.3976226806640625, + "mean_token_accuracy": 0.8750237095355987, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4398873296380043, + "epoch": 3.3768680961663415, + "grad_norm": 0.2897355258464813, + "learning_rate": 0.0003701542367493103, + "loss": 0.40165950775146486, + "mean_token_accuracy": 0.874879752099514, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.4448629415035248, + "epoch": 3.50682261208577, + "grad_norm": 0.2915370464324951, + "learning_rate": 0.00036262330240995327, + "loss": 0.40887042999267575, + "mean_token_accuracy": 0.8728678345680236, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.46088827416300776, + "epoch": 3.636777128005198, + "grad_norm": 0.3924349546432495, + "learning_rate": 0.0003548011143514403, + "loss": 0.4195223617553711, + "mean_token_accuracy": 0.8715560722351074, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4382705672085285, + "epoch": 3.7667316439246266, + "grad_norm": 0.27308768033981323, + "learning_rate": 0.0003467037452113232, + "loss": 0.39946125030517576, + "mean_token_accuracy": 0.8747073370218277, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45163255512714384, + "epoch": 3.8966861598440543, + "grad_norm": 0.34591901302337646, + "learning_rate": 0.00033834783305536283, + "loss": 0.4112929534912109, + "mean_token_accuracy": 0.8722118473052979, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49164803326129913, + "eval_loss": 0.602810800075531, + "eval_mean_token_accuracy": 0.8353393092178382, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.117, + "eval_samples_per_second": 31.195, + "eval_steps_per_second": 3.916, + "step": 1540 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.079894291063982e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a73542f44218b0aa746a5a4d2985b6423009d92b --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-1925/trainer_state.json @@ -0,0 +1,469 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1925, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + }, + { + "entropy": 0.6360254319169414, + "epoch": 1.0389863547758285, + "grad_norm": 0.537056028842926, + "learning_rate": 0.0004417356562802791, + "loss": 0.5937010192871094, + "mean_token_accuracy": 0.8332705884123567, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5945304277539253, + "epoch": 1.1689408706952567, + "grad_norm": 0.4016346335411072, + "learning_rate": 0.00044138169957914594, + "loss": 0.564481430053711, + "mean_token_accuracy": 0.8422813892364502, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5895386649668217, + "epoch": 1.2988953866146848, + "grad_norm": 0.32074230909347534, + "learning_rate": 0.00044057466037611984, + "loss": 0.5511369705200195, + "mean_token_accuracy": 0.8431353771686554, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5951217761635781, + "epoch": 1.428849902534113, + "grad_norm": 0.3066493272781372, + "learning_rate": 0.00043931619693465644, + "loss": 0.5558741760253906, + "mean_token_accuracy": 0.84255860298872, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5773422825336456, + "epoch": 1.5588044184535412, + "grad_norm": 0.38174429535865784, + "learning_rate": 0.00043760889508197114, + "loss": 0.5448895263671875, + "mean_token_accuracy": 0.8427786123752594, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.568668949753046, + "epoch": 1.6887589343729694, + "grad_norm": 0.4095861613750458, + "learning_rate": 0.0004354562628958121, + "loss": 0.5313994598388672, + "mean_token_accuracy": 0.847584228515625, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.5952580836415291, + "epoch": 1.8187134502923976, + "grad_norm": 0.34596189856529236, + "learning_rate": 0.0004328627234962387, + "loss": 0.5546007919311523, + "mean_token_accuracy": 0.8419265493750572, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5800049532949925, + "epoch": 1.9486679662118258, + "grad_norm": 0.3752591609954834, + "learning_rate": 0.0004298336059572178, + "loss": 0.5452734756469727, + "mean_token_accuracy": 0.8436958035826683, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6070062083980212, + "eval_loss": 0.631233274936676, + "eval_mean_token_accuracy": 0.8255004269572405, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.1092, + "eval_samples_per_second": 31.2, + "eval_steps_per_second": 3.916, + "step": 770 + }, + { + "entropy": 0.5432504566770103, + "epoch": 2.077972709551657, + "grad_norm": 0.4615200459957123, + "learning_rate": 0.0004263751343567108, + "loss": 0.5117238616943359, + "mean_token_accuracy": 0.8515748070113024, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5108437672257423, + "epoch": 2.207927225471085, + "grad_norm": 0.39224034547805786, + "learning_rate": 0.0004224944149877526, + "loss": 0.47819324493408205, + "mean_token_accuracy": 0.8583902576565743, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5110109977424144, + "epoch": 2.3378817413905133, + "grad_norm": 0.36829307675361633, + "learning_rate": 0.00041819942175679877, + "loss": 0.4832262420654297, + "mean_token_accuracy": 0.8568637049198151, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5188203908503055, + "epoch": 2.4678362573099415, + "grad_norm": 0.47326067090034485, + "learning_rate": 0.00041349897979934495, + "loss": 0.48348499298095704, + "mean_token_accuracy": 0.8567950987815857, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5181271532177925, + "epoch": 2.5977907732293697, + "grad_norm": 0.2972731292247772, + "learning_rate": 0.0004084027473464832, + "loss": 0.4918654251098633, + "mean_token_accuracy": 0.8539192593097686, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5172442196309567, + "epoch": 2.727745289148798, + "grad_norm": 0.4516422152519226, + "learning_rate": 0.0004029211958796559, + "loss": 0.4767612457275391, + "mean_token_accuracy": 0.8575084239244462, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.5078031922131777, + "epoch": 2.857699805068226, + "grad_norm": 0.28876274824142456, + "learning_rate": 0.00039706558861438384, + "loss": 0.47852867126464843, + "mean_token_accuracy": 0.8580548599362373, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5267327691614628, + "epoch": 2.9876543209876543, + "grad_norm": 0.29029977321624756, + "learning_rate": 0.000390847957357179, + "loss": 0.48259868621826174, + "mean_token_accuracy": 0.8568289718031883, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5411632635559027, + "eval_loss": 0.5956819653511047, + "eval_mean_token_accuracy": 0.8327134661376476, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.1279, + "eval_samples_per_second": 31.189, + "eval_steps_per_second": 3.915, + "step": 1155 + }, + { + "entropy": 0.44545789564674226, + "epoch": 3.116959064327485, + "grad_norm": 0.3036324381828308, + "learning_rate": 0.00038428107778319486, + "loss": 0.4016357421875, + "mean_token_accuracy": 0.8744772864346528, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.4417941153049469, + "epoch": 3.246913580246914, + "grad_norm": 0.3334718942642212, + "learning_rate": 0.0003773784431854142, + "loss": 0.3976226806640625, + "mean_token_accuracy": 0.8750237095355987, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4398873296380043, + "epoch": 3.3768680961663415, + "grad_norm": 0.2897355258464813, + "learning_rate": 0.0003701542367493103, + "loss": 0.40165950775146486, + "mean_token_accuracy": 0.874879752099514, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.4448629415035248, + "epoch": 3.50682261208577, + "grad_norm": 0.2915370464324951, + "learning_rate": 0.00036262330240995327, + "loss": 0.40887042999267575, + "mean_token_accuracy": 0.8728678345680236, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.46088827416300776, + "epoch": 3.636777128005198, + "grad_norm": 0.3924349546432495, + "learning_rate": 0.0003548011143514403, + "loss": 0.4195223617553711, + "mean_token_accuracy": 0.8715560722351074, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4382705672085285, + "epoch": 3.7667316439246266, + "grad_norm": 0.27308768033981323, + "learning_rate": 0.0003467037452113232, + "loss": 0.39946125030517576, + "mean_token_accuracy": 0.8747073370218277, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45163255512714384, + "epoch": 3.8966861598440543, + "grad_norm": 0.34591901302337646, + "learning_rate": 0.00033834783305536283, + "loss": 0.4112929534912109, + "mean_token_accuracy": 0.8722118473052979, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49164803326129913, + "eval_loss": 0.602810800075531, + "eval_mean_token_accuracy": 0.8353393092178382, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.117, + "eval_samples_per_second": 31.195, + "eval_steps_per_second": 3.916, + "step": 1540 + }, + { + "entropy": 0.4349768550401956, + "epoch": 4.025990903183885, + "grad_norm": 0.38304343819618225, + "learning_rate": 0.0003297505471904706, + "loss": 0.38982112884521486, + "mean_token_accuracy": 0.8773354097227355, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.35551133938133717, + "epoch": 4.155945419103314, + "grad_norm": 0.36798641085624695, + "learning_rate": 0.00032092955288608274, + "loss": 0.30814620971679685, + "mean_token_accuracy": 0.8983125445246697, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.3554606523364782, + "epoch": 4.2858999350227425, + "grad_norm": 0.35416939854621887, + "learning_rate": 0.0003119029750764555, + "loss": 0.3161496162414551, + "mean_token_accuracy": 0.8964688742160797, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.3676067052781582, + "epoch": 4.41585445094217, + "grad_norm": 0.2962980568408966, + "learning_rate": 0.00030268936111846394, + "loss": 0.3259954071044922, + "mean_token_accuracy": 0.8930631306767464, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.37352351650595667, + "epoch": 4.545808966861598, + "grad_norm": 0.3697018325328827, + "learning_rate": 0.0002933076426814288, + "loss": 0.33234176635742185, + "mean_token_accuracy": 0.8911465045809746, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.36930313020944594, + "epoch": 4.675763482781027, + "grad_norm": 0.3185271620750427, + "learning_rate": 0.00028377709684727665, + "loss": 0.3297584533691406, + "mean_token_accuracy": 0.8921913403272629, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.3608156970143318, + "epoch": 4.805717998700455, + "grad_norm": 0.411017507314682, + "learning_rate": 0.0002741173065009647, + "loss": 0.3253137969970703, + "mean_token_accuracy": 0.8944574344158173, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.3674951885640621, + "epoch": 4.935672514619883, + "grad_norm": 0.3353460431098938, + "learning_rate": 0.0002643481200925555, + "loss": 0.3277518081665039, + "mean_token_accuracy": 0.8909049332141876, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4197400872810529, + "eval_loss": 0.618202269077301, + "eval_mean_token_accuracy": 0.8406766191698037, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.1536, + "eval_samples_per_second": 31.174, + "eval_steps_per_second": 3.913, + "step": 1925 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.5983628828964045e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3029924fa0255ebce234932e13996a2f2e547e43 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2310/trainer_state.json @@ -0,0 +1,560 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 2310, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + }, + { + "entropy": 0.6360254319169414, + "epoch": 1.0389863547758285, + "grad_norm": 0.537056028842926, + "learning_rate": 0.0004417356562802791, + "loss": 0.5937010192871094, + "mean_token_accuracy": 0.8332705884123567, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5945304277539253, + "epoch": 1.1689408706952567, + "grad_norm": 0.4016346335411072, + "learning_rate": 0.00044138169957914594, + "loss": 0.564481430053711, + "mean_token_accuracy": 0.8422813892364502, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5895386649668217, + "epoch": 1.2988953866146848, + "grad_norm": 0.32074230909347534, + "learning_rate": 0.00044057466037611984, + "loss": 0.5511369705200195, + "mean_token_accuracy": 0.8431353771686554, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5951217761635781, + "epoch": 1.428849902534113, + "grad_norm": 0.3066493272781372, + "learning_rate": 0.00043931619693465644, + "loss": 0.5558741760253906, + "mean_token_accuracy": 0.84255860298872, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5773422825336456, + "epoch": 1.5588044184535412, + "grad_norm": 0.38174429535865784, + "learning_rate": 0.00043760889508197114, + "loss": 0.5448895263671875, + "mean_token_accuracy": 0.8427786123752594, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.568668949753046, + "epoch": 1.6887589343729694, + "grad_norm": 0.4095861613750458, + "learning_rate": 0.0004354562628958121, + "loss": 0.5313994598388672, + "mean_token_accuracy": 0.847584228515625, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.5952580836415291, + "epoch": 1.8187134502923976, + "grad_norm": 0.34596189856529236, + "learning_rate": 0.0004328627234962387, + "loss": 0.5546007919311523, + "mean_token_accuracy": 0.8419265493750572, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5800049532949925, + "epoch": 1.9486679662118258, + "grad_norm": 0.3752591609954834, + "learning_rate": 0.0004298336059572178, + "loss": 0.5452734756469727, + "mean_token_accuracy": 0.8436958035826683, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6070062083980212, + "eval_loss": 0.631233274936676, + "eval_mean_token_accuracy": 0.8255004269572405, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.1092, + "eval_samples_per_second": 31.2, + "eval_steps_per_second": 3.916, + "step": 770 + }, + { + "entropy": 0.5432504566770103, + "epoch": 2.077972709551657, + "grad_norm": 0.4615200459957123, + "learning_rate": 0.0004263751343567108, + "loss": 0.5117238616943359, + "mean_token_accuracy": 0.8515748070113024, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5108437672257423, + "epoch": 2.207927225471085, + "grad_norm": 0.39224034547805786, + "learning_rate": 0.0004224944149877526, + "loss": 0.47819324493408205, + "mean_token_accuracy": 0.8583902576565743, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5110109977424144, + "epoch": 2.3378817413905133, + "grad_norm": 0.36829307675361633, + "learning_rate": 0.00041819942175679877, + "loss": 0.4832262420654297, + "mean_token_accuracy": 0.8568637049198151, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5188203908503055, + "epoch": 2.4678362573099415, + "grad_norm": 0.47326067090034485, + "learning_rate": 0.00041349897979934495, + "loss": 0.48348499298095704, + "mean_token_accuracy": 0.8567950987815857, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5181271532177925, + "epoch": 2.5977907732293697, + "grad_norm": 0.2972731292247772, + "learning_rate": 0.0004084027473464832, + "loss": 0.4918654251098633, + "mean_token_accuracy": 0.8539192593097686, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5172442196309567, + "epoch": 2.727745289148798, + "grad_norm": 0.4516422152519226, + "learning_rate": 0.0004029211958796559, + "loss": 0.4767612457275391, + "mean_token_accuracy": 0.8575084239244462, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.5078031922131777, + "epoch": 2.857699805068226, + "grad_norm": 0.28876274824142456, + "learning_rate": 0.00039706558861438384, + "loss": 0.47852867126464843, + "mean_token_accuracy": 0.8580548599362373, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5267327691614628, + "epoch": 2.9876543209876543, + "grad_norm": 0.29029977321624756, + "learning_rate": 0.000390847957357179, + "loss": 0.48259868621826174, + "mean_token_accuracy": 0.8568289718031883, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5411632635559027, + "eval_loss": 0.5956819653511047, + "eval_mean_token_accuracy": 0.8327134661376476, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.1279, + "eval_samples_per_second": 31.189, + "eval_steps_per_second": 3.915, + "step": 1155 + }, + { + "entropy": 0.44545789564674226, + "epoch": 3.116959064327485, + "grad_norm": 0.3036324381828308, + "learning_rate": 0.00038428107778319486, + "loss": 0.4016357421875, + "mean_token_accuracy": 0.8744772864346528, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.4417941153049469, + "epoch": 3.246913580246914, + "grad_norm": 0.3334718942642212, + "learning_rate": 0.0003773784431854142, + "loss": 0.3976226806640625, + "mean_token_accuracy": 0.8750237095355987, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4398873296380043, + "epoch": 3.3768680961663415, + "grad_norm": 0.2897355258464813, + "learning_rate": 0.0003701542367493103, + "loss": 0.40165950775146486, + "mean_token_accuracy": 0.874879752099514, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.4448629415035248, + "epoch": 3.50682261208577, + "grad_norm": 0.2915370464324951, + "learning_rate": 0.00036262330240995327, + "loss": 0.40887042999267575, + "mean_token_accuracy": 0.8728678345680236, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.46088827416300776, + "epoch": 3.636777128005198, + "grad_norm": 0.3924349546432495, + "learning_rate": 0.0003548011143514403, + "loss": 0.4195223617553711, + "mean_token_accuracy": 0.8715560722351074, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4382705672085285, + "epoch": 3.7667316439246266, + "grad_norm": 0.27308768033981323, + "learning_rate": 0.0003467037452113232, + "loss": 0.39946125030517576, + "mean_token_accuracy": 0.8747073370218277, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45163255512714384, + "epoch": 3.8966861598440543, + "grad_norm": 0.34591901302337646, + "learning_rate": 0.00033834783305536283, + "loss": 0.4112929534912109, + "mean_token_accuracy": 0.8722118473052979, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49164803326129913, + "eval_loss": 0.602810800075531, + "eval_mean_token_accuracy": 0.8353393092178382, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.117, + "eval_samples_per_second": 31.195, + "eval_steps_per_second": 3.916, + "step": 1540 + }, + { + "entropy": 0.4349768550401956, + "epoch": 4.025990903183885, + "grad_norm": 0.38304343819618225, + "learning_rate": 0.0003297505471904706, + "loss": 0.38982112884521486, + "mean_token_accuracy": 0.8773354097227355, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.35551133938133717, + "epoch": 4.155945419103314, + "grad_norm": 0.36798641085624695, + "learning_rate": 0.00032092955288608274, + "loss": 0.30814620971679685, + "mean_token_accuracy": 0.8983125445246697, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.3554606523364782, + "epoch": 4.2858999350227425, + "grad_norm": 0.35416939854621887, + "learning_rate": 0.0003119029750764555, + "loss": 0.3161496162414551, + "mean_token_accuracy": 0.8964688742160797, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.3676067052781582, + "epoch": 4.41585445094217, + "grad_norm": 0.2962980568408966, + "learning_rate": 0.00030268936111846394, + "loss": 0.3259954071044922, + "mean_token_accuracy": 0.8930631306767464, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.37352351650595667, + "epoch": 4.545808966861598, + "grad_norm": 0.3697018325328827, + "learning_rate": 0.0002933076426814288, + "loss": 0.33234176635742185, + "mean_token_accuracy": 0.8911465045809746, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.36930313020944594, + "epoch": 4.675763482781027, + "grad_norm": 0.3185271620750427, + "learning_rate": 0.00028377709684727665, + "loss": 0.3297584533691406, + "mean_token_accuracy": 0.8921913403272629, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.3608156970143318, + "epoch": 4.805717998700455, + "grad_norm": 0.411017507314682, + "learning_rate": 0.0002741173065009647, + "loss": 0.3253137969970703, + "mean_token_accuracy": 0.8944574344158173, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.3674951885640621, + "epoch": 4.935672514619883, + "grad_norm": 0.3353460431098938, + "learning_rate": 0.0002643481200925555, + "loss": 0.3277518081665039, + "mean_token_accuracy": 0.8909049332141876, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4197400872810529, + "eval_loss": 0.618202269077301, + "eval_mean_token_accuracy": 0.8406766191698037, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.1536, + "eval_samples_per_second": 31.174, + "eval_steps_per_second": 3.913, + "step": 1925 + }, + { + "entropy": 0.30353270531000204, + "epoch": 5.064977257959714, + "grad_norm": 0.32264357805252075, + "learning_rate": 0.00025448961085362376, + "loss": 0.2661189651489258, + "mean_token_accuracy": 0.9130437371119782, + "num_tokens": 5739445.0, + "step": 1950 + }, + { + "entropy": 0.251540619507432, + "epoch": 5.1949317738791425, + "grad_norm": 0.3971196711063385, + "learning_rate": 0.0002445620355517912, + "loss": 0.215250244140625, + "mean_token_accuracy": 0.9263233968615532, + "num_tokens": 5888043.0, + "step": 2000 + }, + { + "entropy": 0.2709880671650171, + "epoch": 5.32488628979857, + "grad_norm": 0.3659353256225586, + "learning_rate": 0.00023458579286814304, + "loss": 0.22618688583374025, + "mean_token_accuracy": 0.9207352563738823, + "num_tokens": 6031284.0, + "step": 2050 + }, + { + "entropy": 0.27484370954334736, + "epoch": 5.454840805717999, + "grad_norm": 0.4071715176105499, + "learning_rate": 0.00022458138148304628, + "loss": 0.23481584548950196, + "mean_token_accuracy": 0.9185729047656059, + "num_tokens": 6179927.0, + "step": 2100 + }, + { + "entropy": 0.26769588187336923, + "epoch": 5.584795321637427, + "grad_norm": 0.3493686318397522, + "learning_rate": 0.00021456935795649494, + "loss": 0.23087779998779298, + "mean_token_accuracy": 0.9200295981764793, + "num_tokens": 6322630.0, + "step": 2150 + }, + { + "entropy": 0.26875538341701033, + "epoch": 5.714749837556855, + "grad_norm": 0.3845248520374298, + "learning_rate": 0.0002045702944895277, + "loss": 0.23075325012207032, + "mean_token_accuracy": 0.9196864122152328, + "num_tokens": 6469843.0, + "step": 2200 + }, + { + "entropy": 0.2734881558269262, + "epoch": 5.844704353476283, + "grad_norm": 0.3585197329521179, + "learning_rate": 0.00019460473665350612, + "loss": 0.23366186141967774, + "mean_token_accuracy": 0.9188175854086876, + "num_tokens": 6617768.0, + "step": 2250 + }, + { + "entropy": 0.27908188320696353, + "epoch": 5.974658869395712, + "grad_norm": 0.3747495114803314, + "learning_rate": 0.00018469316117411113, + "loss": 0.23512376785278322, + "mean_token_accuracy": 0.9161470046639443, + "num_tokens": 6761187.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.3402500281540247, + "eval_loss": 0.7022264003753662, + "eval_mean_token_accuracy": 0.8379392472024147, + "eval_num_tokens": 6792840.0, + "eval_runtime": 53.1771, + "eval_samples_per_second": 31.16, + "eval_steps_per_second": 3.911, + "step": 2310 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.116787508073994e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5498e370237e4ce81bdf9812bdba4381f2b8b454 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-2695/trainer_state.json @@ -0,0 +1,641 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 2695, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + }, + { + "entropy": 0.6360254319169414, + "epoch": 1.0389863547758285, + "grad_norm": 0.537056028842926, + "learning_rate": 0.0004417356562802791, + "loss": 0.5937010192871094, + "mean_token_accuracy": 0.8332705884123567, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5945304277539253, + "epoch": 1.1689408706952567, + "grad_norm": 0.4016346335411072, + "learning_rate": 0.00044138169957914594, + "loss": 0.564481430053711, + "mean_token_accuracy": 0.8422813892364502, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5895386649668217, + "epoch": 1.2988953866146848, + "grad_norm": 0.32074230909347534, + "learning_rate": 0.00044057466037611984, + "loss": 0.5511369705200195, + "mean_token_accuracy": 0.8431353771686554, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5951217761635781, + "epoch": 1.428849902534113, + "grad_norm": 0.3066493272781372, + "learning_rate": 0.00043931619693465644, + "loss": 0.5558741760253906, + "mean_token_accuracy": 0.84255860298872, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5773422825336456, + "epoch": 1.5588044184535412, + "grad_norm": 0.38174429535865784, + "learning_rate": 0.00043760889508197114, + "loss": 0.5448895263671875, + "mean_token_accuracy": 0.8427786123752594, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.568668949753046, + "epoch": 1.6887589343729694, + "grad_norm": 0.4095861613750458, + "learning_rate": 0.0004354562628958121, + "loss": 0.5313994598388672, + "mean_token_accuracy": 0.847584228515625, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.5952580836415291, + "epoch": 1.8187134502923976, + "grad_norm": 0.34596189856529236, + "learning_rate": 0.0004328627234962387, + "loss": 0.5546007919311523, + "mean_token_accuracy": 0.8419265493750572, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5800049532949925, + "epoch": 1.9486679662118258, + "grad_norm": 0.3752591609954834, + "learning_rate": 0.0004298336059572178, + "loss": 0.5452734756469727, + "mean_token_accuracy": 0.8436958035826683, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6070062083980212, + "eval_loss": 0.631233274936676, + "eval_mean_token_accuracy": 0.8255004269572405, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.1092, + "eval_samples_per_second": 31.2, + "eval_steps_per_second": 3.916, + "step": 770 + }, + { + "entropy": 0.5432504566770103, + "epoch": 2.077972709551657, + "grad_norm": 0.4615200459957123, + "learning_rate": 0.0004263751343567108, + "loss": 0.5117238616943359, + "mean_token_accuracy": 0.8515748070113024, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5108437672257423, + "epoch": 2.207927225471085, + "grad_norm": 0.39224034547805786, + "learning_rate": 0.0004224944149877526, + "loss": 0.47819324493408205, + "mean_token_accuracy": 0.8583902576565743, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5110109977424144, + "epoch": 2.3378817413905133, + "grad_norm": 0.36829307675361633, + "learning_rate": 0.00041819942175679877, + "loss": 0.4832262420654297, + "mean_token_accuracy": 0.8568637049198151, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5188203908503055, + "epoch": 2.4678362573099415, + "grad_norm": 0.47326067090034485, + "learning_rate": 0.00041349897979934495, + "loss": 0.48348499298095704, + "mean_token_accuracy": 0.8567950987815857, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5181271532177925, + "epoch": 2.5977907732293697, + "grad_norm": 0.2972731292247772, + "learning_rate": 0.0004084027473464832, + "loss": 0.4918654251098633, + "mean_token_accuracy": 0.8539192593097686, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5172442196309567, + "epoch": 2.727745289148798, + "grad_norm": 0.4516422152519226, + "learning_rate": 0.0004029211958796559, + "loss": 0.4767612457275391, + "mean_token_accuracy": 0.8575084239244462, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.5078031922131777, + "epoch": 2.857699805068226, + "grad_norm": 0.28876274824142456, + "learning_rate": 0.00039706558861438384, + "loss": 0.47852867126464843, + "mean_token_accuracy": 0.8580548599362373, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5267327691614628, + "epoch": 2.9876543209876543, + "grad_norm": 0.29029977321624756, + "learning_rate": 0.000390847957357179, + "loss": 0.48259868621826174, + "mean_token_accuracy": 0.8568289718031883, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5411632635559027, + "eval_loss": 0.5956819653511047, + "eval_mean_token_accuracy": 0.8327134661376476, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.1279, + "eval_samples_per_second": 31.189, + "eval_steps_per_second": 3.915, + "step": 1155 + }, + { + "entropy": 0.44545789564674226, + "epoch": 3.116959064327485, + "grad_norm": 0.3036324381828308, + "learning_rate": 0.00038428107778319486, + "loss": 0.4016357421875, + "mean_token_accuracy": 0.8744772864346528, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.4417941153049469, + "epoch": 3.246913580246914, + "grad_norm": 0.3334718942642212, + "learning_rate": 0.0003773784431854142, + "loss": 0.3976226806640625, + "mean_token_accuracy": 0.8750237095355987, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4398873296380043, + "epoch": 3.3768680961663415, + "grad_norm": 0.2897355258464813, + "learning_rate": 0.0003701542367493103, + "loss": 0.40165950775146486, + "mean_token_accuracy": 0.874879752099514, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.4448629415035248, + "epoch": 3.50682261208577, + "grad_norm": 0.2915370464324951, + "learning_rate": 0.00036262330240995327, + "loss": 0.40887042999267575, + "mean_token_accuracy": 0.8728678345680236, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.46088827416300776, + "epoch": 3.636777128005198, + "grad_norm": 0.3924349546432495, + "learning_rate": 0.0003548011143514403, + "loss": 0.4195223617553711, + "mean_token_accuracy": 0.8715560722351074, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4382705672085285, + "epoch": 3.7667316439246266, + "grad_norm": 0.27308768033981323, + "learning_rate": 0.0003467037452113232, + "loss": 0.39946125030517576, + "mean_token_accuracy": 0.8747073370218277, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45163255512714384, + "epoch": 3.8966861598440543, + "grad_norm": 0.34591901302337646, + "learning_rate": 0.00033834783305536283, + "loss": 0.4112929534912109, + "mean_token_accuracy": 0.8722118473052979, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49164803326129913, + "eval_loss": 0.602810800075531, + "eval_mean_token_accuracy": 0.8353393092178382, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.117, + "eval_samples_per_second": 31.195, + "eval_steps_per_second": 3.916, + "step": 1540 + }, + { + "entropy": 0.4349768550401956, + "epoch": 4.025990903183885, + "grad_norm": 0.38304343819618225, + "learning_rate": 0.0003297505471904706, + "loss": 0.38982112884521486, + "mean_token_accuracy": 0.8773354097227355, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.35551133938133717, + "epoch": 4.155945419103314, + "grad_norm": 0.36798641085624695, + "learning_rate": 0.00032092955288608274, + "loss": 0.30814620971679685, + "mean_token_accuracy": 0.8983125445246697, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.3554606523364782, + "epoch": 4.2858999350227425, + "grad_norm": 0.35416939854621887, + "learning_rate": 0.0003119029750764555, + "loss": 0.3161496162414551, + "mean_token_accuracy": 0.8964688742160797, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.3676067052781582, + "epoch": 4.41585445094217, + "grad_norm": 0.2962980568408966, + "learning_rate": 0.00030268936111846394, + "loss": 0.3259954071044922, + "mean_token_accuracy": 0.8930631306767464, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.37352351650595667, + "epoch": 4.545808966861598, + "grad_norm": 0.3697018325328827, + "learning_rate": 0.0002933076426814288, + "loss": 0.33234176635742185, + "mean_token_accuracy": 0.8911465045809746, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.36930313020944594, + "epoch": 4.675763482781027, + "grad_norm": 0.3185271620750427, + "learning_rate": 0.00028377709684727665, + "loss": 0.3297584533691406, + "mean_token_accuracy": 0.8921913403272629, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.3608156970143318, + "epoch": 4.805717998700455, + "grad_norm": 0.411017507314682, + "learning_rate": 0.0002741173065009647, + "loss": 0.3253137969970703, + "mean_token_accuracy": 0.8944574344158173, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.3674951885640621, + "epoch": 4.935672514619883, + "grad_norm": 0.3353460431098938, + "learning_rate": 0.0002643481200925555, + "loss": 0.3277518081665039, + "mean_token_accuracy": 0.8909049332141876, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4197400872810529, + "eval_loss": 0.618202269077301, + "eval_mean_token_accuracy": 0.8406766191698037, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.1536, + "eval_samples_per_second": 31.174, + "eval_steps_per_second": 3.913, + "step": 1925 + }, + { + "entropy": 0.30353270531000204, + "epoch": 5.064977257959714, + "grad_norm": 0.32264357805252075, + "learning_rate": 0.00025448961085362376, + "loss": 0.2661189651489258, + "mean_token_accuracy": 0.9130437371119782, + "num_tokens": 5739445.0, + "step": 1950 + }, + { + "entropy": 0.251540619507432, + "epoch": 5.1949317738791425, + "grad_norm": 0.3971196711063385, + "learning_rate": 0.0002445620355517912, + "loss": 0.215250244140625, + "mean_token_accuracy": 0.9263233968615532, + "num_tokens": 5888043.0, + "step": 2000 + }, + { + "entropy": 0.2709880671650171, + "epoch": 5.32488628979857, + "grad_norm": 0.3659353256225586, + "learning_rate": 0.00023458579286814304, + "loss": 0.22618688583374025, + "mean_token_accuracy": 0.9207352563738823, + "num_tokens": 6031284.0, + "step": 2050 + }, + { + "entropy": 0.27484370954334736, + "epoch": 5.454840805717999, + "grad_norm": 0.4071715176105499, + "learning_rate": 0.00022458138148304628, + "loss": 0.23481584548950196, + "mean_token_accuracy": 0.9185729047656059, + "num_tokens": 6179927.0, + "step": 2100 + }, + { + "entropy": 0.26769588187336923, + "epoch": 5.584795321637427, + "grad_norm": 0.3493686318397522, + "learning_rate": 0.00021456935795649494, + "loss": 0.23087779998779298, + "mean_token_accuracy": 0.9200295981764793, + "num_tokens": 6322630.0, + "step": 2150 + }, + { + "entropy": 0.26875538341701033, + "epoch": 5.714749837556855, + "grad_norm": 0.3845248520374298, + "learning_rate": 0.0002045702944895277, + "loss": 0.23075325012207032, + "mean_token_accuracy": 0.9196864122152328, + "num_tokens": 6469843.0, + "step": 2200 + }, + { + "entropy": 0.2734881558269262, + "epoch": 5.844704353476283, + "grad_norm": 0.3585197329521179, + "learning_rate": 0.00019460473665350612, + "loss": 0.23366186141967774, + "mean_token_accuracy": 0.9188175854086876, + "num_tokens": 6617768.0, + "step": 2250 + }, + { + "entropy": 0.27908188320696353, + "epoch": 5.974658869395712, + "grad_norm": 0.3747495114803314, + "learning_rate": 0.00018469316117411113, + "loss": 0.23512376785278322, + "mean_token_accuracy": 0.9161470046639443, + "num_tokens": 6761187.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.3402500281540247, + "eval_loss": 0.7022264003753662, + "eval_mean_token_accuracy": 0.8379392472024147, + "eval_num_tokens": 6792840.0, + "eval_runtime": 53.1771, + "eval_samples_per_second": 31.16, + "eval_steps_per_second": 3.911, + "step": 2310 + }, + { + "entropy": 0.19567224593018767, + "epoch": 6.1039636127355426, + "grad_norm": 0.34765154123306274, + "learning_rate": 0.0001748559338567996, + "loss": 0.14980849266052246, + "mean_token_accuracy": 0.9468569485985454, + "num_tokens": 6909578.0, + "step": 2350 + }, + { + "entropy": 0.17214293472468853, + "epoch": 6.23391812865497, + "grad_norm": 0.2610681354999542, + "learning_rate": 0.0001651132677401741, + "loss": 0.13241591453552246, + "mean_token_accuracy": 0.9536613565683365, + "num_tokens": 7056244.0, + "step": 2400 + }, + { + "entropy": 0.1707596355304122, + "epoch": 6.363872644574399, + "grad_norm": 0.3031822144985199, + "learning_rate": 0.00015548518156325097, + "loss": 0.13481686592102052, + "mean_token_accuracy": 0.9523171505331993, + "num_tokens": 7206950.0, + "step": 2450 + }, + { + "entropy": 0.1729374410584569, + "epoch": 6.493827160493828, + "grad_norm": 0.3582129180431366, + "learning_rate": 0.00014599145863196456, + "loss": 0.13048507690429687, + "mean_token_accuracy": 0.9524129882454873, + "num_tokens": 7359429.0, + "step": 2500 + }, + { + "entropy": 0.18609228238463402, + "epoch": 6.623781676413255, + "grad_norm": 0.4041096270084381, + "learning_rate": 0.00013665160616942816, + "loss": 0.1391014862060547, + "mean_token_accuracy": 0.9490628919005394, + "num_tokens": 7499281.0, + "step": 2550 + }, + { + "entropy": 0.18581669881939888, + "epoch": 6.753736192332683, + "grad_norm": 0.49577364325523376, + "learning_rate": 0.0001274848152334751, + "loss": 0.14069243431091308, + "mean_token_accuracy": 0.949679699242115, + "num_tokens": 7640758.0, + "step": 2600 + }, + { + "entropy": 0.1762319504469633, + "epoch": 6.883690708252112, + "grad_norm": 0.3717825412750244, + "learning_rate": 0.0001185099212838419, + "loss": 0.13402896881103515, + "mean_token_accuracy": 0.9521244546771049, + "num_tokens": 7791986.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.26854762040938324, + "eval_loss": 0.821937084197998, + "eval_mean_token_accuracy": 0.8360469177938424, + "eval_num_tokens": 7924980.0, + "eval_runtime": 53.2166, + "eval_samples_per_second": 31.137, + "eval_steps_per_second": 3.909, + "step": 2695 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.635798199405896e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1eb728fd9cca93fab585c77315fdb0314d6020 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3080/trainer_state.json @@ -0,0 +1,732 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 3080, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + }, + { + "entropy": 0.6360254319169414, + "epoch": 1.0389863547758285, + "grad_norm": 0.537056028842926, + "learning_rate": 0.0004417356562802791, + "loss": 0.5937010192871094, + "mean_token_accuracy": 0.8332705884123567, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5945304277539253, + "epoch": 1.1689408706952567, + "grad_norm": 0.4016346335411072, + "learning_rate": 0.00044138169957914594, + "loss": 0.564481430053711, + "mean_token_accuracy": 0.8422813892364502, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5895386649668217, + "epoch": 1.2988953866146848, + "grad_norm": 0.32074230909347534, + "learning_rate": 0.00044057466037611984, + "loss": 0.5511369705200195, + "mean_token_accuracy": 0.8431353771686554, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5951217761635781, + "epoch": 1.428849902534113, + "grad_norm": 0.3066493272781372, + "learning_rate": 0.00043931619693465644, + "loss": 0.5558741760253906, + "mean_token_accuracy": 0.84255860298872, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5773422825336456, + "epoch": 1.5588044184535412, + "grad_norm": 0.38174429535865784, + "learning_rate": 0.00043760889508197114, + "loss": 0.5448895263671875, + "mean_token_accuracy": 0.8427786123752594, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.568668949753046, + "epoch": 1.6887589343729694, + "grad_norm": 0.4095861613750458, + "learning_rate": 0.0004354562628958121, + "loss": 0.5313994598388672, + "mean_token_accuracy": 0.847584228515625, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.5952580836415291, + "epoch": 1.8187134502923976, + "grad_norm": 0.34596189856529236, + "learning_rate": 0.0004328627234962387, + "loss": 0.5546007919311523, + "mean_token_accuracy": 0.8419265493750572, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5800049532949925, + "epoch": 1.9486679662118258, + "grad_norm": 0.3752591609954834, + "learning_rate": 0.0004298336059572178, + "loss": 0.5452734756469727, + "mean_token_accuracy": 0.8436958035826683, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6070062083980212, + "eval_loss": 0.631233274936676, + "eval_mean_token_accuracy": 0.8255004269572405, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.1092, + "eval_samples_per_second": 31.2, + "eval_steps_per_second": 3.916, + "step": 770 + }, + { + "entropy": 0.5432504566770103, + "epoch": 2.077972709551657, + "grad_norm": 0.4615200459957123, + "learning_rate": 0.0004263751343567108, + "loss": 0.5117238616943359, + "mean_token_accuracy": 0.8515748070113024, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5108437672257423, + "epoch": 2.207927225471085, + "grad_norm": 0.39224034547805786, + "learning_rate": 0.0004224944149877526, + "loss": 0.47819324493408205, + "mean_token_accuracy": 0.8583902576565743, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5110109977424144, + "epoch": 2.3378817413905133, + "grad_norm": 0.36829307675361633, + "learning_rate": 0.00041819942175679877, + "loss": 0.4832262420654297, + "mean_token_accuracy": 0.8568637049198151, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5188203908503055, + "epoch": 2.4678362573099415, + "grad_norm": 0.47326067090034485, + "learning_rate": 0.00041349897979934495, + "loss": 0.48348499298095704, + "mean_token_accuracy": 0.8567950987815857, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5181271532177925, + "epoch": 2.5977907732293697, + "grad_norm": 0.2972731292247772, + "learning_rate": 0.0004084027473464832, + "loss": 0.4918654251098633, + "mean_token_accuracy": 0.8539192593097686, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5172442196309567, + "epoch": 2.727745289148798, + "grad_norm": 0.4516422152519226, + "learning_rate": 0.0004029211958796559, + "loss": 0.4767612457275391, + "mean_token_accuracy": 0.8575084239244462, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.5078031922131777, + "epoch": 2.857699805068226, + "grad_norm": 0.28876274824142456, + "learning_rate": 0.00039706558861438384, + "loss": 0.47852867126464843, + "mean_token_accuracy": 0.8580548599362373, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5267327691614628, + "epoch": 2.9876543209876543, + "grad_norm": 0.29029977321624756, + "learning_rate": 0.000390847957357179, + "loss": 0.48259868621826174, + "mean_token_accuracy": 0.8568289718031883, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5411632635559027, + "eval_loss": 0.5956819653511047, + "eval_mean_token_accuracy": 0.8327134661376476, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.1279, + "eval_samples_per_second": 31.189, + "eval_steps_per_second": 3.915, + "step": 1155 + }, + { + "entropy": 0.44545789564674226, + "epoch": 3.116959064327485, + "grad_norm": 0.3036324381828308, + "learning_rate": 0.00038428107778319486, + "loss": 0.4016357421875, + "mean_token_accuracy": 0.8744772864346528, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.4417941153049469, + "epoch": 3.246913580246914, + "grad_norm": 0.3334718942642212, + "learning_rate": 0.0003773784431854142, + "loss": 0.3976226806640625, + "mean_token_accuracy": 0.8750237095355987, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4398873296380043, + "epoch": 3.3768680961663415, + "grad_norm": 0.2897355258464813, + "learning_rate": 0.0003701542367493103, + "loss": 0.40165950775146486, + "mean_token_accuracy": 0.874879752099514, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.4448629415035248, + "epoch": 3.50682261208577, + "grad_norm": 0.2915370464324951, + "learning_rate": 0.00036262330240995327, + "loss": 0.40887042999267575, + "mean_token_accuracy": 0.8728678345680236, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.46088827416300776, + "epoch": 3.636777128005198, + "grad_norm": 0.3924349546432495, + "learning_rate": 0.0003548011143514403, + "loss": 0.4195223617553711, + "mean_token_accuracy": 0.8715560722351074, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4382705672085285, + "epoch": 3.7667316439246266, + "grad_norm": 0.27308768033981323, + "learning_rate": 0.0003467037452113232, + "loss": 0.39946125030517576, + "mean_token_accuracy": 0.8747073370218277, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45163255512714384, + "epoch": 3.8966861598440543, + "grad_norm": 0.34591901302337646, + "learning_rate": 0.00033834783305536283, + "loss": 0.4112929534912109, + "mean_token_accuracy": 0.8722118473052979, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49164803326129913, + "eval_loss": 0.602810800075531, + "eval_mean_token_accuracy": 0.8353393092178382, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.117, + "eval_samples_per_second": 31.195, + "eval_steps_per_second": 3.916, + "step": 1540 + }, + { + "entropy": 0.4349768550401956, + "epoch": 4.025990903183885, + "grad_norm": 0.38304343819618225, + "learning_rate": 0.0003297505471904706, + "loss": 0.38982112884521486, + "mean_token_accuracy": 0.8773354097227355, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.35551133938133717, + "epoch": 4.155945419103314, + "grad_norm": 0.36798641085624695, + "learning_rate": 0.00032092955288608274, + "loss": 0.30814620971679685, + "mean_token_accuracy": 0.8983125445246697, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.3554606523364782, + "epoch": 4.2858999350227425, + "grad_norm": 0.35416939854621887, + "learning_rate": 0.0003119029750764555, + "loss": 0.3161496162414551, + "mean_token_accuracy": 0.8964688742160797, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.3676067052781582, + "epoch": 4.41585445094217, + "grad_norm": 0.2962980568408966, + "learning_rate": 0.00030268936111846394, + "loss": 0.3259954071044922, + "mean_token_accuracy": 0.8930631306767464, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.37352351650595667, + "epoch": 4.545808966861598, + "grad_norm": 0.3697018325328827, + "learning_rate": 0.0002933076426814288, + "loss": 0.33234176635742185, + "mean_token_accuracy": 0.8911465045809746, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.36930313020944594, + "epoch": 4.675763482781027, + "grad_norm": 0.3185271620750427, + "learning_rate": 0.00028377709684727665, + "loss": 0.3297584533691406, + "mean_token_accuracy": 0.8921913403272629, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.3608156970143318, + "epoch": 4.805717998700455, + "grad_norm": 0.411017507314682, + "learning_rate": 0.0002741173065009647, + "loss": 0.3253137969970703, + "mean_token_accuracy": 0.8944574344158173, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.3674951885640621, + "epoch": 4.935672514619883, + "grad_norm": 0.3353460431098938, + "learning_rate": 0.0002643481200925555, + "loss": 0.3277518081665039, + "mean_token_accuracy": 0.8909049332141876, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4197400872810529, + "eval_loss": 0.618202269077301, + "eval_mean_token_accuracy": 0.8406766191698037, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.1536, + "eval_samples_per_second": 31.174, + "eval_steps_per_second": 3.913, + "step": 1925 + }, + { + "entropy": 0.30353270531000204, + "epoch": 5.064977257959714, + "grad_norm": 0.32264357805252075, + "learning_rate": 0.00025448961085362376, + "loss": 0.2661189651489258, + "mean_token_accuracy": 0.9130437371119782, + "num_tokens": 5739445.0, + "step": 1950 + }, + { + "entropy": 0.251540619507432, + "epoch": 5.1949317738791425, + "grad_norm": 0.3971196711063385, + "learning_rate": 0.0002445620355517912, + "loss": 0.215250244140625, + "mean_token_accuracy": 0.9263233968615532, + "num_tokens": 5888043.0, + "step": 2000 + }, + { + "entropy": 0.2709880671650171, + "epoch": 5.32488628979857, + "grad_norm": 0.3659353256225586, + "learning_rate": 0.00023458579286814304, + "loss": 0.22618688583374025, + "mean_token_accuracy": 0.9207352563738823, + "num_tokens": 6031284.0, + "step": 2050 + }, + { + "entropy": 0.27484370954334736, + "epoch": 5.454840805717999, + "grad_norm": 0.4071715176105499, + "learning_rate": 0.00022458138148304628, + "loss": 0.23481584548950196, + "mean_token_accuracy": 0.9185729047656059, + "num_tokens": 6179927.0, + "step": 2100 + }, + { + "entropy": 0.26769588187336923, + "epoch": 5.584795321637427, + "grad_norm": 0.3493686318397522, + "learning_rate": 0.00021456935795649494, + "loss": 0.23087779998779298, + "mean_token_accuracy": 0.9200295981764793, + "num_tokens": 6322630.0, + "step": 2150 + }, + { + "entropy": 0.26875538341701033, + "epoch": 5.714749837556855, + "grad_norm": 0.3845248520374298, + "learning_rate": 0.0002045702944895277, + "loss": 0.23075325012207032, + "mean_token_accuracy": 0.9196864122152328, + "num_tokens": 6469843.0, + "step": 2200 + }, + { + "entropy": 0.2734881558269262, + "epoch": 5.844704353476283, + "grad_norm": 0.3585197329521179, + "learning_rate": 0.00019460473665350612, + "loss": 0.23366186141967774, + "mean_token_accuracy": 0.9188175854086876, + "num_tokens": 6617768.0, + "step": 2250 + }, + { + "entropy": 0.27908188320696353, + "epoch": 5.974658869395712, + "grad_norm": 0.3747495114803314, + "learning_rate": 0.00018469316117411113, + "loss": 0.23512376785278322, + "mean_token_accuracy": 0.9161470046639443, + "num_tokens": 6761187.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.3402500281540247, + "eval_loss": 0.7022264003753662, + "eval_mean_token_accuracy": 0.8379392472024147, + "eval_num_tokens": 6792840.0, + "eval_runtime": 53.1771, + "eval_samples_per_second": 31.16, + "eval_steps_per_second": 3.911, + "step": 2310 + }, + { + "entropy": 0.19567224593018767, + "epoch": 6.1039636127355426, + "grad_norm": 0.34765154123306274, + "learning_rate": 0.0001748559338567996, + "loss": 0.14980849266052246, + "mean_token_accuracy": 0.9468569485985454, + "num_tokens": 6909578.0, + "step": 2350 + }, + { + "entropy": 0.17214293472468853, + "epoch": 6.23391812865497, + "grad_norm": 0.2610681354999542, + "learning_rate": 0.0001651132677401741, + "loss": 0.13241591453552246, + "mean_token_accuracy": 0.9536613565683365, + "num_tokens": 7056244.0, + "step": 2400 + }, + { + "entropy": 0.1707596355304122, + "epoch": 6.363872644574399, + "grad_norm": 0.3031822144985199, + "learning_rate": 0.00015548518156325097, + "loss": 0.13481686592102052, + "mean_token_accuracy": 0.9523171505331993, + "num_tokens": 7206950.0, + "step": 2450 + }, + { + "entropy": 0.1729374410584569, + "epoch": 6.493827160493828, + "grad_norm": 0.3582129180431366, + "learning_rate": 0.00014599145863196456, + "loss": 0.13048507690429687, + "mean_token_accuracy": 0.9524129882454873, + "num_tokens": 7359429.0, + "step": 2500 + }, + { + "entropy": 0.18609228238463402, + "epoch": 6.623781676413255, + "grad_norm": 0.4041096270084381, + "learning_rate": 0.00013665160616942816, + "loss": 0.1391014862060547, + "mean_token_accuracy": 0.9490628919005394, + "num_tokens": 7499281.0, + "step": 2550 + }, + { + "entropy": 0.18581669881939888, + "epoch": 6.753736192332683, + "grad_norm": 0.49577364325523376, + "learning_rate": 0.0001274848152334751, + "loss": 0.14069243431091308, + "mean_token_accuracy": 0.949679699242115, + "num_tokens": 7640758.0, + "step": 2600 + }, + { + "entropy": 0.1762319504469633, + "epoch": 6.883690708252112, + "grad_norm": 0.3717825412750244, + "learning_rate": 0.0001185099212838419, + "loss": 0.13402896881103515, + "mean_token_accuracy": 0.9521244546771049, + "num_tokens": 7791986.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.26854762040938324, + "eval_loss": 0.821937084197998, + "eval_mean_token_accuracy": 0.8360469177938424, + "eval_num_tokens": 7924980.0, + "eval_runtime": 53.2166, + "eval_samples_per_second": 31.137, + "eval_steps_per_second": 3.909, + "step": 2695 + }, + { + "entropy": 0.16884614399929143, + "epoch": 7.012995451591943, + "grad_norm": 0.1985609084367752, + "learning_rate": 0.00010974536548001442, + "loss": 0.1253983211517334, + "mean_token_accuracy": 0.9559567993010708, + "num_tokens": 7940521.0, + "step": 2700 + }, + { + "entropy": 0.11399174220860005, + "epoch": 7.142949967511371, + "grad_norm": 0.24280235171318054, + "learning_rate": 0.00010120915678926424, + "loss": 0.07573186874389648, + "mean_token_accuracy": 0.9740175333619118, + "num_tokens": 8084193.0, + "step": 2750 + }, + { + "entropy": 0.11322081722319126, + "epoch": 7.272904483430799, + "grad_norm": 0.3016868233680725, + "learning_rate": 9.291883498273002e-05, + "loss": 0.0749389362335205, + "mean_token_accuracy": 0.9736263358592987, + "num_tokens": 8230159.0, + "step": 2800 + }, + { + "entropy": 0.10975711965933442, + "epoch": 7.402858999350228, + "grad_norm": 0.2391500324010849, + "learning_rate": 8.489143459558115e-05, + "loss": 0.07450803756713867, + "mean_token_accuracy": 0.9741762459278107, + "num_tokens": 8380556.0, + "step": 2850 + }, + { + "entropy": 0.11804175904020667, + "epoch": 7.532813515269655, + "grad_norm": 0.24333180487155914, + "learning_rate": 7.714344992531333e-05, + "loss": 0.07613008975982666, + "mean_token_accuracy": 0.9720202484726905, + "num_tokens": 8523248.0, + "step": 2900 + }, + { + "entropy": 0.11418169999495148, + "epoch": 7.662768031189084, + "grad_norm": 0.26805493235588074, + "learning_rate": 6.969080114009809e-05, + "loss": 0.0742732048034668, + "mean_token_accuracy": 0.9728452184796333, + "num_tokens": 8672002.0, + "step": 2950 + }, + { + "entropy": 0.11600488040596246, + "epoch": 7.792722547108512, + "grad_norm": 0.30010196566581726, + "learning_rate": 6.254880156682401e-05, + "loss": 0.0739283561706543, + "mean_token_accuracy": 0.9734612467885018, + "num_tokens": 8816090.0, + "step": 3000 + }, + { + "entropy": 0.11802038468420506, + "epoch": 7.92267706302794, + "grad_norm": 0.25391119718551636, + "learning_rate": 5.5732126226045195e-05, + "loss": 0.07241737365722656, + "mean_token_accuracy": 0.9727071779966354, + "num_tokens": 8966004.0, + "step": 3050 + }, + { + "epoch": 8.0, + "eval_entropy": 0.21782970414138758, + "eval_loss": 0.9648962616920471, + "eval_mean_token_accuracy": 0.8366291666260133, + "eval_num_tokens": 9057120.0, + "eval_runtime": 53.1636, + "eval_samples_per_second": 31.168, + "eval_steps_per_second": 3.912, + "step": 3080 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.154271468542024e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..86590449cf1687e2ecc7fa2acd84e1cba6ec8e1a --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3465/trainer_state.json @@ -0,0 +1,823 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 3465, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + }, + { + "entropy": 0.6360254319169414, + "epoch": 1.0389863547758285, + "grad_norm": 0.537056028842926, + "learning_rate": 0.0004417356562802791, + "loss": 0.5937010192871094, + "mean_token_accuracy": 0.8332705884123567, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5945304277539253, + "epoch": 1.1689408706952567, + "grad_norm": 0.4016346335411072, + "learning_rate": 0.00044138169957914594, + "loss": 0.564481430053711, + "mean_token_accuracy": 0.8422813892364502, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5895386649668217, + "epoch": 1.2988953866146848, + "grad_norm": 0.32074230909347534, + "learning_rate": 0.00044057466037611984, + "loss": 0.5511369705200195, + "mean_token_accuracy": 0.8431353771686554, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5951217761635781, + "epoch": 1.428849902534113, + "grad_norm": 0.3066493272781372, + "learning_rate": 0.00043931619693465644, + "loss": 0.5558741760253906, + "mean_token_accuracy": 0.84255860298872, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5773422825336456, + "epoch": 1.5588044184535412, + "grad_norm": 0.38174429535865784, + "learning_rate": 0.00043760889508197114, + "loss": 0.5448895263671875, + "mean_token_accuracy": 0.8427786123752594, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.568668949753046, + "epoch": 1.6887589343729694, + "grad_norm": 0.4095861613750458, + "learning_rate": 0.0004354562628958121, + "loss": 0.5313994598388672, + "mean_token_accuracy": 0.847584228515625, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.5952580836415291, + "epoch": 1.8187134502923976, + "grad_norm": 0.34596189856529236, + "learning_rate": 0.0004328627234962387, + "loss": 0.5546007919311523, + "mean_token_accuracy": 0.8419265493750572, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5800049532949925, + "epoch": 1.9486679662118258, + "grad_norm": 0.3752591609954834, + "learning_rate": 0.0004298336059572178, + "loss": 0.5452734756469727, + "mean_token_accuracy": 0.8436958035826683, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6070062083980212, + "eval_loss": 0.631233274936676, + "eval_mean_token_accuracy": 0.8255004269572405, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.1092, + "eval_samples_per_second": 31.2, + "eval_steps_per_second": 3.916, + "step": 770 + }, + { + "entropy": 0.5432504566770103, + "epoch": 2.077972709551657, + "grad_norm": 0.4615200459957123, + "learning_rate": 0.0004263751343567108, + "loss": 0.5117238616943359, + "mean_token_accuracy": 0.8515748070113024, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5108437672257423, + "epoch": 2.207927225471085, + "grad_norm": 0.39224034547805786, + "learning_rate": 0.0004224944149877526, + "loss": 0.47819324493408205, + "mean_token_accuracy": 0.8583902576565743, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5110109977424144, + "epoch": 2.3378817413905133, + "grad_norm": 0.36829307675361633, + "learning_rate": 0.00041819942175679877, + "loss": 0.4832262420654297, + "mean_token_accuracy": 0.8568637049198151, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5188203908503055, + "epoch": 2.4678362573099415, + "grad_norm": 0.47326067090034485, + "learning_rate": 0.00041349897979934495, + "loss": 0.48348499298095704, + "mean_token_accuracy": 0.8567950987815857, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5181271532177925, + "epoch": 2.5977907732293697, + "grad_norm": 0.2972731292247772, + "learning_rate": 0.0004084027473464832, + "loss": 0.4918654251098633, + "mean_token_accuracy": 0.8539192593097686, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5172442196309567, + "epoch": 2.727745289148798, + "grad_norm": 0.4516422152519226, + "learning_rate": 0.0004029211958796559, + "loss": 0.4767612457275391, + "mean_token_accuracy": 0.8575084239244462, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.5078031922131777, + "epoch": 2.857699805068226, + "grad_norm": 0.28876274824142456, + "learning_rate": 0.00039706558861438384, + "loss": 0.47852867126464843, + "mean_token_accuracy": 0.8580548599362373, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5267327691614628, + "epoch": 2.9876543209876543, + "grad_norm": 0.29029977321624756, + "learning_rate": 0.000390847957357179, + "loss": 0.48259868621826174, + "mean_token_accuracy": 0.8568289718031883, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5411632635559027, + "eval_loss": 0.5956819653511047, + "eval_mean_token_accuracy": 0.8327134661376476, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.1279, + "eval_samples_per_second": 31.189, + "eval_steps_per_second": 3.915, + "step": 1155 + }, + { + "entropy": 0.44545789564674226, + "epoch": 3.116959064327485, + "grad_norm": 0.3036324381828308, + "learning_rate": 0.00038428107778319486, + "loss": 0.4016357421875, + "mean_token_accuracy": 0.8744772864346528, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.4417941153049469, + "epoch": 3.246913580246914, + "grad_norm": 0.3334718942642212, + "learning_rate": 0.0003773784431854142, + "loss": 0.3976226806640625, + "mean_token_accuracy": 0.8750237095355987, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4398873296380043, + "epoch": 3.3768680961663415, + "grad_norm": 0.2897355258464813, + "learning_rate": 0.0003701542367493103, + "loss": 0.40165950775146486, + "mean_token_accuracy": 0.874879752099514, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.4448629415035248, + "epoch": 3.50682261208577, + "grad_norm": 0.2915370464324951, + "learning_rate": 0.00036262330240995327, + "loss": 0.40887042999267575, + "mean_token_accuracy": 0.8728678345680236, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.46088827416300776, + "epoch": 3.636777128005198, + "grad_norm": 0.3924349546432495, + "learning_rate": 0.0003548011143514403, + "loss": 0.4195223617553711, + "mean_token_accuracy": 0.8715560722351074, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4382705672085285, + "epoch": 3.7667316439246266, + "grad_norm": 0.27308768033981323, + "learning_rate": 0.0003467037452113232, + "loss": 0.39946125030517576, + "mean_token_accuracy": 0.8747073370218277, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45163255512714384, + "epoch": 3.8966861598440543, + "grad_norm": 0.34591901302337646, + "learning_rate": 0.00033834783305536283, + "loss": 0.4112929534912109, + "mean_token_accuracy": 0.8722118473052979, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49164803326129913, + "eval_loss": 0.602810800075531, + "eval_mean_token_accuracy": 0.8353393092178382, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.117, + "eval_samples_per_second": 31.195, + "eval_steps_per_second": 3.916, + "step": 1540 + }, + { + "entropy": 0.4349768550401956, + "epoch": 4.025990903183885, + "grad_norm": 0.38304343819618225, + "learning_rate": 0.0003297505471904706, + "loss": 0.38982112884521486, + "mean_token_accuracy": 0.8773354097227355, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.35551133938133717, + "epoch": 4.155945419103314, + "grad_norm": 0.36798641085624695, + "learning_rate": 0.00032092955288608274, + "loss": 0.30814620971679685, + "mean_token_accuracy": 0.8983125445246697, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.3554606523364782, + "epoch": 4.2858999350227425, + "grad_norm": 0.35416939854621887, + "learning_rate": 0.0003119029750764555, + "loss": 0.3161496162414551, + "mean_token_accuracy": 0.8964688742160797, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.3676067052781582, + "epoch": 4.41585445094217, + "grad_norm": 0.2962980568408966, + "learning_rate": 0.00030268936111846394, + "loss": 0.3259954071044922, + "mean_token_accuracy": 0.8930631306767464, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.37352351650595667, + "epoch": 4.545808966861598, + "grad_norm": 0.3697018325328827, + "learning_rate": 0.0002933076426814288, + "loss": 0.33234176635742185, + "mean_token_accuracy": 0.8911465045809746, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.36930313020944594, + "epoch": 4.675763482781027, + "grad_norm": 0.3185271620750427, + "learning_rate": 0.00028377709684727665, + "loss": 0.3297584533691406, + "mean_token_accuracy": 0.8921913403272629, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.3608156970143318, + "epoch": 4.805717998700455, + "grad_norm": 0.411017507314682, + "learning_rate": 0.0002741173065009647, + "loss": 0.3253137969970703, + "mean_token_accuracy": 0.8944574344158173, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.3674951885640621, + "epoch": 4.935672514619883, + "grad_norm": 0.3353460431098938, + "learning_rate": 0.0002643481200925555, + "loss": 0.3277518081665039, + "mean_token_accuracy": 0.8909049332141876, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4197400872810529, + "eval_loss": 0.618202269077301, + "eval_mean_token_accuracy": 0.8406766191698037, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.1536, + "eval_samples_per_second": 31.174, + "eval_steps_per_second": 3.913, + "step": 1925 + }, + { + "entropy": 0.30353270531000204, + "epoch": 5.064977257959714, + "grad_norm": 0.32264357805252075, + "learning_rate": 0.00025448961085362376, + "loss": 0.2661189651489258, + "mean_token_accuracy": 0.9130437371119782, + "num_tokens": 5739445.0, + "step": 1950 + }, + { + "entropy": 0.251540619507432, + "epoch": 5.1949317738791425, + "grad_norm": 0.3971196711063385, + "learning_rate": 0.0002445620355517912, + "loss": 0.215250244140625, + "mean_token_accuracy": 0.9263233968615532, + "num_tokens": 5888043.0, + "step": 2000 + }, + { + "entropy": 0.2709880671650171, + "epoch": 5.32488628979857, + "grad_norm": 0.3659353256225586, + "learning_rate": 0.00023458579286814304, + "loss": 0.22618688583374025, + "mean_token_accuracy": 0.9207352563738823, + "num_tokens": 6031284.0, + "step": 2050 + }, + { + "entropy": 0.27484370954334736, + "epoch": 5.454840805717999, + "grad_norm": 0.4071715176105499, + "learning_rate": 0.00022458138148304628, + "loss": 0.23481584548950196, + "mean_token_accuracy": 0.9185729047656059, + "num_tokens": 6179927.0, + "step": 2100 + }, + { + "entropy": 0.26769588187336923, + "epoch": 5.584795321637427, + "grad_norm": 0.3493686318397522, + "learning_rate": 0.00021456935795649494, + "loss": 0.23087779998779298, + "mean_token_accuracy": 0.9200295981764793, + "num_tokens": 6322630.0, + "step": 2150 + }, + { + "entropy": 0.26875538341701033, + "epoch": 5.714749837556855, + "grad_norm": 0.3845248520374298, + "learning_rate": 0.0002045702944895277, + "loss": 0.23075325012207032, + "mean_token_accuracy": 0.9196864122152328, + "num_tokens": 6469843.0, + "step": 2200 + }, + { + "entropy": 0.2734881558269262, + "epoch": 5.844704353476283, + "grad_norm": 0.3585197329521179, + "learning_rate": 0.00019460473665350612, + "loss": 0.23366186141967774, + "mean_token_accuracy": 0.9188175854086876, + "num_tokens": 6617768.0, + "step": 2250 + }, + { + "entropy": 0.27908188320696353, + "epoch": 5.974658869395712, + "grad_norm": 0.3747495114803314, + "learning_rate": 0.00018469316117411113, + "loss": 0.23512376785278322, + "mean_token_accuracy": 0.9161470046639443, + "num_tokens": 6761187.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.3402500281540247, + "eval_loss": 0.7022264003753662, + "eval_mean_token_accuracy": 0.8379392472024147, + "eval_num_tokens": 6792840.0, + "eval_runtime": 53.1771, + "eval_samples_per_second": 31.16, + "eval_steps_per_second": 3.911, + "step": 2310 + }, + { + "entropy": 0.19567224593018767, + "epoch": 6.1039636127355426, + "grad_norm": 0.34765154123306274, + "learning_rate": 0.0001748559338567996, + "loss": 0.14980849266052246, + "mean_token_accuracy": 0.9468569485985454, + "num_tokens": 6909578.0, + "step": 2350 + }, + { + "entropy": 0.17214293472468853, + "epoch": 6.23391812865497, + "grad_norm": 0.2610681354999542, + "learning_rate": 0.0001651132677401741, + "loss": 0.13241591453552246, + "mean_token_accuracy": 0.9536613565683365, + "num_tokens": 7056244.0, + "step": 2400 + }, + { + "entropy": 0.1707596355304122, + "epoch": 6.363872644574399, + "grad_norm": 0.3031822144985199, + "learning_rate": 0.00015548518156325097, + "loss": 0.13481686592102052, + "mean_token_accuracy": 0.9523171505331993, + "num_tokens": 7206950.0, + "step": 2450 + }, + { + "entropy": 0.1729374410584569, + "epoch": 6.493827160493828, + "grad_norm": 0.3582129180431366, + "learning_rate": 0.00014599145863196456, + "loss": 0.13048507690429687, + "mean_token_accuracy": 0.9524129882454873, + "num_tokens": 7359429.0, + "step": 2500 + }, + { + "entropy": 0.18609228238463402, + "epoch": 6.623781676413255, + "grad_norm": 0.4041096270084381, + "learning_rate": 0.00013665160616942816, + "loss": 0.1391014862060547, + "mean_token_accuracy": 0.9490628919005394, + "num_tokens": 7499281.0, + "step": 2550 + }, + { + "entropy": 0.18581669881939888, + "epoch": 6.753736192332683, + "grad_norm": 0.49577364325523376, + "learning_rate": 0.0001274848152334751, + "loss": 0.14069243431091308, + "mean_token_accuracy": 0.949679699242115, + "num_tokens": 7640758.0, + "step": 2600 + }, + { + "entropy": 0.1762319504469633, + "epoch": 6.883690708252112, + "grad_norm": 0.3717825412750244, + "learning_rate": 0.0001185099212838419, + "loss": 0.13402896881103515, + "mean_token_accuracy": 0.9521244546771049, + "num_tokens": 7791986.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.26854762040938324, + "eval_loss": 0.821937084197998, + "eval_mean_token_accuracy": 0.8360469177938424, + "eval_num_tokens": 7924980.0, + "eval_runtime": 53.2166, + "eval_samples_per_second": 31.137, + "eval_steps_per_second": 3.909, + "step": 2695 + }, + { + "entropy": 0.16884614399929143, + "epoch": 7.012995451591943, + "grad_norm": 0.1985609084367752, + "learning_rate": 0.00010974536548001442, + "loss": 0.1253983211517334, + "mean_token_accuracy": 0.9559567993010708, + "num_tokens": 7940521.0, + "step": 2700 + }, + { + "entropy": 0.11399174220860005, + "epoch": 7.142949967511371, + "grad_norm": 0.24280235171318054, + "learning_rate": 0.00010120915678926424, + "loss": 0.07573186874389648, + "mean_token_accuracy": 0.9740175333619118, + "num_tokens": 8084193.0, + "step": 2750 + }, + { + "entropy": 0.11322081722319126, + "epoch": 7.272904483430799, + "grad_norm": 0.3016868233680725, + "learning_rate": 9.291883498273002e-05, + "loss": 0.0749389362335205, + "mean_token_accuracy": 0.9736263358592987, + "num_tokens": 8230159.0, + "step": 2800 + }, + { + "entropy": 0.10975711965933442, + "epoch": 7.402858999350228, + "grad_norm": 0.2391500324010849, + "learning_rate": 8.489143459558115e-05, + "loss": 0.07450803756713867, + "mean_token_accuracy": 0.9741762459278107, + "num_tokens": 8380556.0, + "step": 2850 + }, + { + "entropy": 0.11804175904020667, + "epoch": 7.532813515269655, + "grad_norm": 0.24333180487155914, + "learning_rate": 7.714344992531333e-05, + "loss": 0.07613008975982666, + "mean_token_accuracy": 0.9720202484726905, + "num_tokens": 8523248.0, + "step": 2900 + }, + { + "entropy": 0.11418169999495148, + "epoch": 7.662768031189084, + "grad_norm": 0.26805493235588074, + "learning_rate": 6.969080114009809e-05, + "loss": 0.0742732048034668, + "mean_token_accuracy": 0.9728452184796333, + "num_tokens": 8672002.0, + "step": 2950 + }, + { + "entropy": 0.11600488040596246, + "epoch": 7.792722547108512, + "grad_norm": 0.30010196566581726, + "learning_rate": 6.254880156682401e-05, + "loss": 0.0739283561706543, + "mean_token_accuracy": 0.9734612467885018, + "num_tokens": 8816090.0, + "step": 3000 + }, + { + "entropy": 0.11802038468420506, + "epoch": 7.92267706302794, + "grad_norm": 0.25391119718551636, + "learning_rate": 5.5732126226045195e-05, + "loss": 0.07241737365722656, + "mean_token_accuracy": 0.9727071779966354, + "num_tokens": 8966004.0, + "step": 3050 + }, + { + "epoch": 8.0, + "eval_entropy": 0.21782970414138758, + "eval_loss": 0.9648962616920471, + "eval_mean_token_accuracy": 0.8366291666260133, + "eval_num_tokens": 9057120.0, + "eval_runtime": 53.1636, + "eval_samples_per_second": 31.168, + "eval_steps_per_second": 3.912, + "step": 3080 + }, + { + "entropy": 0.10789870936891541, + "epoch": 8.05198180636777, + "grad_norm": 0.13128913938999176, + "learning_rate": 4.925478167848853e-05, + "loss": 0.06333432197570801, + "mean_token_accuracy": 0.9768412313868652, + "num_tokens": 9115929.0, + "step": 3100 + }, + { + "entropy": 0.09126614544540644, + "epoch": 8.1819363222872, + "grad_norm": 0.17148423194885254, + "learning_rate": 4.3130077245079537e-05, + "loss": 0.05348764419555664, + "mean_token_accuracy": 0.9794670847058297, + "num_tokens": 9260646.0, + "step": 3150 + }, + { + "entropy": 0.0940462826564908, + "epoch": 8.311890838206628, + "grad_norm": 0.1125558540225029, + "learning_rate": 3.7370597659620595e-05, + "loss": 0.054701466560363766, + "mean_token_accuracy": 0.9788439679145813, + "num_tokens": 9404737.0, + "step": 3200 + }, + { + "entropy": 0.09131624042987824, + "epoch": 8.441845354126055, + "grad_norm": 0.11930789798498154, + "learning_rate": 3.198817721031492e-05, + "loss": 0.05378121376037598, + "mean_token_accuracy": 0.9795553028583527, + "num_tokens": 9552241.0, + "step": 3250 + }, + { + "entropy": 0.09954260623082518, + "epoch": 8.571799870045485, + "grad_norm": 0.11159078031778336, + "learning_rate": 2.6993875423267592e-05, + "loss": 0.058625984191894534, + "mean_token_accuracy": 0.9768018040060997, + "num_tokens": 9689407.0, + "step": 3300 + }, + { + "entropy": 0.0895724980160594, + "epoch": 8.701754385964913, + "grad_norm": 0.12595818936824799, + "learning_rate": 2.239795433792923e-05, + "loss": 0.05260286808013916, + "mean_token_accuracy": 0.9801814475655556, + "num_tokens": 9842204.0, + "step": 3350 + }, + { + "entropy": 0.0897881293296814, + "epoch": 8.83170890188434, + "grad_norm": 0.12930619716644287, + "learning_rate": 1.820985742117483e-05, + "loss": 0.05259728908538818, + "mean_token_accuracy": 0.9796033400297165, + "num_tokens": 9994524.0, + "step": 3400 + }, + { + "entropy": 0.08761547094210982, + "epoch": 8.961663417803768, + "grad_norm": 0.16654609143733978, + "learning_rate": 1.4438190163344354e-05, + "loss": 0.0525990629196167, + "mean_token_accuracy": 0.9798887211084366, + "num_tokens": 10147597.0, + "step": 3450 + }, + { + "epoch": 9.0, + "eval_entropy": 0.19581801902789336, + "eval_loss": 1.0490690469741821, + "eval_mean_token_accuracy": 0.8374674781583823, + "eval_num_tokens": 10189260.0, + "eval_runtime": 53.1081, + "eval_samples_per_second": 31.201, + "eval_steps_per_second": 3.917, + "step": 3465 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.67157400856064e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..be864e6da7ad2d985a45bf5bc17b2ac706d25e1a --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-385/trainer_state.json @@ -0,0 +1,115 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 385, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.17496881987584e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2d768897029f822629540ea63da2c038fc87cf18 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-3850/trainer_state.json @@ -0,0 +1,914 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 3850, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + }, + { + "entropy": 0.6360254319169414, + "epoch": 1.0389863547758285, + "grad_norm": 0.537056028842926, + "learning_rate": 0.0004417356562802791, + "loss": 0.5937010192871094, + "mean_token_accuracy": 0.8332705884123567, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5945304277539253, + "epoch": 1.1689408706952567, + "grad_norm": 0.4016346335411072, + "learning_rate": 0.00044138169957914594, + "loss": 0.564481430053711, + "mean_token_accuracy": 0.8422813892364502, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5895386649668217, + "epoch": 1.2988953866146848, + "grad_norm": 0.32074230909347534, + "learning_rate": 0.00044057466037611984, + "loss": 0.5511369705200195, + "mean_token_accuracy": 0.8431353771686554, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5951217761635781, + "epoch": 1.428849902534113, + "grad_norm": 0.3066493272781372, + "learning_rate": 0.00043931619693465644, + "loss": 0.5558741760253906, + "mean_token_accuracy": 0.84255860298872, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5773422825336456, + "epoch": 1.5588044184535412, + "grad_norm": 0.38174429535865784, + "learning_rate": 0.00043760889508197114, + "loss": 0.5448895263671875, + "mean_token_accuracy": 0.8427786123752594, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.568668949753046, + "epoch": 1.6887589343729694, + "grad_norm": 0.4095861613750458, + "learning_rate": 0.0004354562628958121, + "loss": 0.5313994598388672, + "mean_token_accuracy": 0.847584228515625, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.5952580836415291, + "epoch": 1.8187134502923976, + "grad_norm": 0.34596189856529236, + "learning_rate": 0.0004328627234962387, + "loss": 0.5546007919311523, + "mean_token_accuracy": 0.8419265493750572, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5800049532949925, + "epoch": 1.9486679662118258, + "grad_norm": 0.3752591609954834, + "learning_rate": 0.0004298336059572178, + "loss": 0.5452734756469727, + "mean_token_accuracy": 0.8436958035826683, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6070062083980212, + "eval_loss": 0.631233274936676, + "eval_mean_token_accuracy": 0.8255004269572405, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.1092, + "eval_samples_per_second": 31.2, + "eval_steps_per_second": 3.916, + "step": 770 + }, + { + "entropy": 0.5432504566770103, + "epoch": 2.077972709551657, + "grad_norm": 0.4615200459957123, + "learning_rate": 0.0004263751343567108, + "loss": 0.5117238616943359, + "mean_token_accuracy": 0.8515748070113024, + "num_tokens": 2348248.0, + "step": 800 + }, + { + "entropy": 0.5108437672257423, + "epoch": 2.207927225471085, + "grad_norm": 0.39224034547805786, + "learning_rate": 0.0004224944149877526, + "loss": 0.47819324493408205, + "mean_token_accuracy": 0.8583902576565743, + "num_tokens": 2501427.0, + "step": 850 + }, + { + "entropy": 0.5110109977424144, + "epoch": 2.3378817413905133, + "grad_norm": 0.36829307675361633, + "learning_rate": 0.00041819942175679877, + "loss": 0.4832262420654297, + "mean_token_accuracy": 0.8568637049198151, + "num_tokens": 2650794.0, + "step": 900 + }, + { + "entropy": 0.5188203908503055, + "epoch": 2.4678362573099415, + "grad_norm": 0.47326067090034485, + "learning_rate": 0.00041349897979934495, + "loss": 0.48348499298095704, + "mean_token_accuracy": 0.8567950987815857, + "num_tokens": 2798180.0, + "step": 950 + }, + { + "entropy": 0.5181271532177925, + "epoch": 2.5977907732293697, + "grad_norm": 0.2972731292247772, + "learning_rate": 0.0004084027473464832, + "loss": 0.4918654251098633, + "mean_token_accuracy": 0.8539192593097686, + "num_tokens": 2950301.0, + "step": 1000 + }, + { + "entropy": 0.5172442196309567, + "epoch": 2.727745289148798, + "grad_norm": 0.4516422152519226, + "learning_rate": 0.0004029211958796559, + "loss": 0.4767612457275391, + "mean_token_accuracy": 0.8575084239244462, + "num_tokens": 3091810.0, + "step": 1050 + }, + { + "entropy": 0.5078031922131777, + "epoch": 2.857699805068226, + "grad_norm": 0.28876274824142456, + "learning_rate": 0.00039706558861438384, + "loss": 0.47852867126464843, + "mean_token_accuracy": 0.8580548599362373, + "num_tokens": 3242184.0, + "step": 1100 + }, + { + "entropy": 0.5267327691614628, + "epoch": 2.9876543209876543, + "grad_norm": 0.29029977321624756, + "learning_rate": 0.000390847957357179, + "loss": 0.48259868621826174, + "mean_token_accuracy": 0.8568289718031883, + "num_tokens": 3382605.0, + "step": 1150 + }, + { + "epoch": 3.0, + "eval_entropy": 0.5411632635559027, + "eval_loss": 0.5956819653511047, + "eval_mean_token_accuracy": 0.8327134661376476, + "eval_num_tokens": 3396420.0, + "eval_runtime": 53.1279, + "eval_samples_per_second": 31.189, + "eval_steps_per_second": 3.915, + "step": 1155 + }, + { + "entropy": 0.44545789564674226, + "epoch": 3.116959064327485, + "grad_norm": 0.3036324381828308, + "learning_rate": 0.00038428107778319486, + "loss": 0.4016357421875, + "mean_token_accuracy": 0.8744772864346528, + "num_tokens": 3522582.0, + "step": 1200 + }, + { + "entropy": 0.4417941153049469, + "epoch": 3.246913580246914, + "grad_norm": 0.3334718942642212, + "learning_rate": 0.0003773784431854142, + "loss": 0.3976226806640625, + "mean_token_accuracy": 0.8750237095355987, + "num_tokens": 3670864.0, + "step": 1250 + }, + { + "entropy": 0.4398873296380043, + "epoch": 3.3768680961663415, + "grad_norm": 0.2897355258464813, + "learning_rate": 0.0003701542367493103, + "loss": 0.40165950775146486, + "mean_token_accuracy": 0.874879752099514, + "num_tokens": 3822021.0, + "step": 1300 + }, + { + "entropy": 0.4448629415035248, + "epoch": 3.50682261208577, + "grad_norm": 0.2915370464324951, + "learning_rate": 0.00036262330240995327, + "loss": 0.40887042999267575, + "mean_token_accuracy": 0.8728678345680236, + "num_tokens": 3968736.0, + "step": 1350 + }, + { + "entropy": 0.46088827416300776, + "epoch": 3.636777128005198, + "grad_norm": 0.3924349546432495, + "learning_rate": 0.0003548011143514403, + "loss": 0.4195223617553711, + "mean_token_accuracy": 0.8715560722351074, + "num_tokens": 4113509.0, + "step": 1400 + }, + { + "entropy": 0.4382705672085285, + "epoch": 3.7667316439246266, + "grad_norm": 0.27308768033981323, + "learning_rate": 0.0003467037452113232, + "loss": 0.39946125030517576, + "mean_token_accuracy": 0.8747073370218277, + "num_tokens": 4267958.0, + "step": 1450 + }, + { + "entropy": 0.45163255512714384, + "epoch": 3.8966861598440543, + "grad_norm": 0.34591901302337646, + "learning_rate": 0.00033834783305536283, + "loss": 0.4112929534912109, + "mean_token_accuracy": 0.8722118473052979, + "num_tokens": 4415398.0, + "step": 1500 + }, + { + "epoch": 4.0, + "eval_entropy": 0.49164803326129913, + "eval_loss": 0.602810800075531, + "eval_mean_token_accuracy": 0.8353393092178382, + "eval_num_tokens": 4528560.0, + "eval_runtime": 53.117, + "eval_samples_per_second": 31.195, + "eval_steps_per_second": 3.916, + "step": 1540 + }, + { + "entropy": 0.4349768550401956, + "epoch": 4.025990903183885, + "grad_norm": 0.38304343819618225, + "learning_rate": 0.0003297505471904706, + "loss": 0.38982112884521486, + "mean_token_accuracy": 0.8773354097227355, + "num_tokens": 4559534.0, + "step": 1550 + }, + { + "entropy": 0.35551133938133717, + "epoch": 4.155945419103314, + "grad_norm": 0.36798641085624695, + "learning_rate": 0.00032092955288608274, + "loss": 0.30814620971679685, + "mean_token_accuracy": 0.8983125445246697, + "num_tokens": 4707075.0, + "step": 1600 + }, + { + "entropy": 0.3554606523364782, + "epoch": 4.2858999350227425, + "grad_norm": 0.35416939854621887, + "learning_rate": 0.0003119029750764555, + "loss": 0.3161496162414551, + "mean_token_accuracy": 0.8964688742160797, + "num_tokens": 4851683.0, + "step": 1650 + }, + { + "entropy": 0.3676067052781582, + "epoch": 4.41585445094217, + "grad_norm": 0.2962980568408966, + "learning_rate": 0.00030268936111846394, + "loss": 0.3259954071044922, + "mean_token_accuracy": 0.8930631306767464, + "num_tokens": 4994086.0, + "step": 1700 + }, + { + "entropy": 0.37352351650595667, + "epoch": 4.545808966861598, + "grad_norm": 0.3697018325328827, + "learning_rate": 0.0002933076426814288, + "loss": 0.33234176635742185, + "mean_token_accuracy": 0.8911465045809746, + "num_tokens": 5137171.0, + "step": 1750 + }, + { + "entropy": 0.36930313020944594, + "epoch": 4.675763482781027, + "grad_norm": 0.3185271620750427, + "learning_rate": 0.00028377709684727665, + "loss": 0.3297584533691406, + "mean_token_accuracy": 0.8921913403272629, + "num_tokens": 5287076.0, + "step": 1800 + }, + { + "entropy": 0.3608156970143318, + "epoch": 4.805717998700455, + "grad_norm": 0.411017507314682, + "learning_rate": 0.0002741173065009647, + "loss": 0.3253137969970703, + "mean_token_accuracy": 0.8944574344158173, + "num_tokens": 5436923.0, + "step": 1850 + }, + { + "entropy": 0.3674951885640621, + "epoch": 4.935672514619883, + "grad_norm": 0.3353460431098938, + "learning_rate": 0.0002643481200925555, + "loss": 0.3277518081665039, + "mean_token_accuracy": 0.8909049332141876, + "num_tokens": 5589195.0, + "step": 1900 + }, + { + "epoch": 5.0, + "eval_entropy": 0.4197400872810529, + "eval_loss": 0.618202269077301, + "eval_mean_token_accuracy": 0.8406766191698037, + "eval_num_tokens": 5660700.0, + "eval_runtime": 53.1536, + "eval_samples_per_second": 31.174, + "eval_steps_per_second": 3.913, + "step": 1925 + }, + { + "entropy": 0.30353270531000204, + "epoch": 5.064977257959714, + "grad_norm": 0.32264357805252075, + "learning_rate": 0.00025448961085362376, + "loss": 0.2661189651489258, + "mean_token_accuracy": 0.9130437371119782, + "num_tokens": 5739445.0, + "step": 1950 + }, + { + "entropy": 0.251540619507432, + "epoch": 5.1949317738791425, + "grad_norm": 0.3971196711063385, + "learning_rate": 0.0002445620355517912, + "loss": 0.215250244140625, + "mean_token_accuracy": 0.9263233968615532, + "num_tokens": 5888043.0, + "step": 2000 + }, + { + "entropy": 0.2709880671650171, + "epoch": 5.32488628979857, + "grad_norm": 0.3659353256225586, + "learning_rate": 0.00023458579286814304, + "loss": 0.22618688583374025, + "mean_token_accuracy": 0.9207352563738823, + "num_tokens": 6031284.0, + "step": 2050 + }, + { + "entropy": 0.27484370954334736, + "epoch": 5.454840805717999, + "grad_norm": 0.4071715176105499, + "learning_rate": 0.00022458138148304628, + "loss": 0.23481584548950196, + "mean_token_accuracy": 0.9185729047656059, + "num_tokens": 6179927.0, + "step": 2100 + }, + { + "entropy": 0.26769588187336923, + "epoch": 5.584795321637427, + "grad_norm": 0.3493686318397522, + "learning_rate": 0.00021456935795649494, + "loss": 0.23087779998779298, + "mean_token_accuracy": 0.9200295981764793, + "num_tokens": 6322630.0, + "step": 2150 + }, + { + "entropy": 0.26875538341701033, + "epoch": 5.714749837556855, + "grad_norm": 0.3845248520374298, + "learning_rate": 0.0002045702944895277, + "loss": 0.23075325012207032, + "mean_token_accuracy": 0.9196864122152328, + "num_tokens": 6469843.0, + "step": 2200 + }, + { + "entropy": 0.2734881558269262, + "epoch": 5.844704353476283, + "grad_norm": 0.3585197329521179, + "learning_rate": 0.00019460473665350612, + "loss": 0.23366186141967774, + "mean_token_accuracy": 0.9188175854086876, + "num_tokens": 6617768.0, + "step": 2250 + }, + { + "entropy": 0.27908188320696353, + "epoch": 5.974658869395712, + "grad_norm": 0.3747495114803314, + "learning_rate": 0.00018469316117411113, + "loss": 0.23512376785278322, + "mean_token_accuracy": 0.9161470046639443, + "num_tokens": 6761187.0, + "step": 2300 + }, + { + "epoch": 6.0, + "eval_entropy": 0.3402500281540247, + "eval_loss": 0.7022264003753662, + "eval_mean_token_accuracy": 0.8379392472024147, + "eval_num_tokens": 6792840.0, + "eval_runtime": 53.1771, + "eval_samples_per_second": 31.16, + "eval_steps_per_second": 3.911, + "step": 2310 + }, + { + "entropy": 0.19567224593018767, + "epoch": 6.1039636127355426, + "grad_norm": 0.34765154123306274, + "learning_rate": 0.0001748559338567996, + "loss": 0.14980849266052246, + "mean_token_accuracy": 0.9468569485985454, + "num_tokens": 6909578.0, + "step": 2350 + }, + { + "entropy": 0.17214293472468853, + "epoch": 6.23391812865497, + "grad_norm": 0.2610681354999542, + "learning_rate": 0.0001651132677401741, + "loss": 0.13241591453552246, + "mean_token_accuracy": 0.9536613565683365, + "num_tokens": 7056244.0, + "step": 2400 + }, + { + "entropy": 0.1707596355304122, + "epoch": 6.363872644574399, + "grad_norm": 0.3031822144985199, + "learning_rate": 0.00015548518156325097, + "loss": 0.13481686592102052, + "mean_token_accuracy": 0.9523171505331993, + "num_tokens": 7206950.0, + "step": 2450 + }, + { + "entropy": 0.1729374410584569, + "epoch": 6.493827160493828, + "grad_norm": 0.3582129180431366, + "learning_rate": 0.00014599145863196456, + "loss": 0.13048507690429687, + "mean_token_accuracy": 0.9524129882454873, + "num_tokens": 7359429.0, + "step": 2500 + }, + { + "entropy": 0.18609228238463402, + "epoch": 6.623781676413255, + "grad_norm": 0.4041096270084381, + "learning_rate": 0.00013665160616942816, + "loss": 0.1391014862060547, + "mean_token_accuracy": 0.9490628919005394, + "num_tokens": 7499281.0, + "step": 2550 + }, + { + "entropy": 0.18581669881939888, + "epoch": 6.753736192332683, + "grad_norm": 0.49577364325523376, + "learning_rate": 0.0001274848152334751, + "loss": 0.14069243431091308, + "mean_token_accuracy": 0.949679699242115, + "num_tokens": 7640758.0, + "step": 2600 + }, + { + "entropy": 0.1762319504469633, + "epoch": 6.883690708252112, + "grad_norm": 0.3717825412750244, + "learning_rate": 0.0001185099212838419, + "loss": 0.13402896881103515, + "mean_token_accuracy": 0.9521244546771049, + "num_tokens": 7791986.0, + "step": 2650 + }, + { + "epoch": 7.0, + "eval_entropy": 0.26854762040938324, + "eval_loss": 0.821937084197998, + "eval_mean_token_accuracy": 0.8360469177938424, + "eval_num_tokens": 7924980.0, + "eval_runtime": 53.2166, + "eval_samples_per_second": 31.137, + "eval_steps_per_second": 3.909, + "step": 2695 + }, + { + "entropy": 0.16884614399929143, + "epoch": 7.012995451591943, + "grad_norm": 0.1985609084367752, + "learning_rate": 0.00010974536548001442, + "loss": 0.1253983211517334, + "mean_token_accuracy": 0.9559567993010708, + "num_tokens": 7940521.0, + "step": 2700 + }, + { + "entropy": 0.11399174220860005, + "epoch": 7.142949967511371, + "grad_norm": 0.24280235171318054, + "learning_rate": 0.00010120915678926424, + "loss": 0.07573186874389648, + "mean_token_accuracy": 0.9740175333619118, + "num_tokens": 8084193.0, + "step": 2750 + }, + { + "entropy": 0.11322081722319126, + "epoch": 7.272904483430799, + "grad_norm": 0.3016868233680725, + "learning_rate": 9.291883498273002e-05, + "loss": 0.0749389362335205, + "mean_token_accuracy": 0.9736263358592987, + "num_tokens": 8230159.0, + "step": 2800 + }, + { + "entropy": 0.10975711965933442, + "epoch": 7.402858999350228, + "grad_norm": 0.2391500324010849, + "learning_rate": 8.489143459558115e-05, + "loss": 0.07450803756713867, + "mean_token_accuracy": 0.9741762459278107, + "num_tokens": 8380556.0, + "step": 2850 + }, + { + "entropy": 0.11804175904020667, + "epoch": 7.532813515269655, + "grad_norm": 0.24333180487155914, + "learning_rate": 7.714344992531333e-05, + "loss": 0.07613008975982666, + "mean_token_accuracy": 0.9720202484726905, + "num_tokens": 8523248.0, + "step": 2900 + }, + { + "entropy": 0.11418169999495148, + "epoch": 7.662768031189084, + "grad_norm": 0.26805493235588074, + "learning_rate": 6.969080114009809e-05, + "loss": 0.0742732048034668, + "mean_token_accuracy": 0.9728452184796333, + "num_tokens": 8672002.0, + "step": 2950 + }, + { + "entropy": 0.11600488040596246, + "epoch": 7.792722547108512, + "grad_norm": 0.30010196566581726, + "learning_rate": 6.254880156682401e-05, + "loss": 0.0739283561706543, + "mean_token_accuracy": 0.9734612467885018, + "num_tokens": 8816090.0, + "step": 3000 + }, + { + "entropy": 0.11802038468420506, + "epoch": 7.92267706302794, + "grad_norm": 0.25391119718551636, + "learning_rate": 5.5732126226045195e-05, + "loss": 0.07241737365722656, + "mean_token_accuracy": 0.9727071779966354, + "num_tokens": 8966004.0, + "step": 3050 + }, + { + "epoch": 8.0, + "eval_entropy": 0.21782970414138758, + "eval_loss": 0.9648962616920471, + "eval_mean_token_accuracy": 0.8366291666260133, + "eval_num_tokens": 9057120.0, + "eval_runtime": 53.1636, + "eval_samples_per_second": 31.168, + "eval_steps_per_second": 3.912, + "step": 3080 + }, + { + "entropy": 0.10789870936891541, + "epoch": 8.05198180636777, + "grad_norm": 0.13128913938999176, + "learning_rate": 4.925478167848853e-05, + "loss": 0.06333432197570801, + "mean_token_accuracy": 0.9768412313868652, + "num_tokens": 9115929.0, + "step": 3100 + }, + { + "entropy": 0.09126614544540644, + "epoch": 8.1819363222872, + "grad_norm": 0.17148423194885254, + "learning_rate": 4.3130077245079537e-05, + "loss": 0.05348764419555664, + "mean_token_accuracy": 0.9794670847058297, + "num_tokens": 9260646.0, + "step": 3150 + }, + { + "entropy": 0.0940462826564908, + "epoch": 8.311890838206628, + "grad_norm": 0.1125558540225029, + "learning_rate": 3.7370597659620595e-05, + "loss": 0.054701466560363766, + "mean_token_accuracy": 0.9788439679145813, + "num_tokens": 9404737.0, + "step": 3200 + }, + { + "entropy": 0.09131624042987824, + "epoch": 8.441845354126055, + "grad_norm": 0.11930789798498154, + "learning_rate": 3.198817721031492e-05, + "loss": 0.05378121376037598, + "mean_token_accuracy": 0.9795553028583527, + "num_tokens": 9552241.0, + "step": 3250 + }, + { + "entropy": 0.09954260623082518, + "epoch": 8.571799870045485, + "grad_norm": 0.11159078031778336, + "learning_rate": 2.6993875423267592e-05, + "loss": 0.058625984191894534, + "mean_token_accuracy": 0.9768018040060997, + "num_tokens": 9689407.0, + "step": 3300 + }, + { + "entropy": 0.0895724980160594, + "epoch": 8.701754385964913, + "grad_norm": 0.12595818936824799, + "learning_rate": 2.239795433792923e-05, + "loss": 0.05260286808013916, + "mean_token_accuracy": 0.9801814475655556, + "num_tokens": 9842204.0, + "step": 3350 + }, + { + "entropy": 0.0897881293296814, + "epoch": 8.83170890188434, + "grad_norm": 0.12930619716644287, + "learning_rate": 1.820985742117483e-05, + "loss": 0.05259728908538818, + "mean_token_accuracy": 0.9796033400297165, + "num_tokens": 9994524.0, + "step": 3400 + }, + { + "entropy": 0.08761547094210982, + "epoch": 8.961663417803768, + "grad_norm": 0.16654609143733978, + "learning_rate": 1.4438190163344354e-05, + "loss": 0.0525990629196167, + "mean_token_accuracy": 0.9798887211084366, + "num_tokens": 10147597.0, + "step": 3450 + }, + { + "epoch": 9.0, + "eval_entropy": 0.19581801902789336, + "eval_loss": 1.0490690469741821, + "eval_mean_token_accuracy": 0.8374674781583823, + "eval_num_tokens": 10189260.0, + "eval_runtime": 53.1081, + "eval_samples_per_second": 31.201, + "eval_steps_per_second": 3.917, + "step": 3465 + }, + { + "entropy": 0.08281858840105522, + "epoch": 9.0909681611436, + "grad_norm": 0.07840815931558609, + "learning_rate": 1.1090702396114882e-05, + "loss": 0.049499006271362306, + "mean_token_accuracy": 0.9826878234369671, + "num_tokens": 10294278.0, + "step": 3500 + }, + { + "entropy": 0.08089617094956339, + "epoch": 9.220922677063028, + "grad_norm": 0.07133486866950989, + "learning_rate": 8.174272368537607e-06, + "loss": 0.046767563819885255, + "mean_token_accuracy": 0.9814594948291778, + "num_tokens": 10446433.0, + "step": 3550 + }, + { + "entropy": 0.08559382131323218, + "epoch": 9.350877192982455, + "grad_norm": 0.12356307357549667, + "learning_rate": 5.694892613958283e-06, + "loss": 0.04791583061218262, + "mean_token_accuracy": 0.9801762121915817, + "num_tokens": 10596480.0, + "step": 3600 + }, + { + "entropy": 0.08924649698659777, + "epoch": 9.480831708901885, + "grad_norm": 0.1030779778957367, + "learning_rate": 3.657657636862211e-06, + "loss": 0.049855589866638184, + "mean_token_accuracy": 0.9797253888845444, + "num_tokens": 10740528.0, + "step": 3650 + }, + { + "entropy": 0.08608358707278967, + "epoch": 9.610786224821313, + "grad_norm": 0.1303764432668686, + "learning_rate": 2.0667534449430365e-06, + "loss": 0.051627678871154783, + "mean_token_accuracy": 0.9801352617144584, + "num_tokens": 10881750.0, + "step": 3700 + }, + { + "entropy": 0.08337799687869847, + "epoch": 9.74074074074074, + "grad_norm": 0.10697782784700394, + "learning_rate": 9.254489479054157e-07, + "loss": 0.04981210708618164, + "mean_token_accuracy": 0.9807726776599884, + "num_tokens": 11027552.0, + "step": 3750 + }, + { + "entropy": 0.08381836652755738, + "epoch": 9.870695256660168, + "grad_norm": 0.10532698035240173, + "learning_rate": 2.3608924067417744e-07, + "loss": 0.050357179641723634, + "mean_token_accuracy": 0.9809106415510178, + "num_tokens": 11172587.0, + "step": 3800 + }, + { + "entropy": 0.08511934059686098, + "epoch": 10.0, + "grad_norm": 0.11242295801639557, + "learning_rate": 9.078481152009603e-11, + "loss": 0.048008084297180176, + "mean_token_accuracy": 0.9807723943312564, + "num_tokens": 11321400.0, + "step": 3850 + }, + { + "epoch": 10.0, + "eval_entropy": 0.18645986377333218, + "eval_loss": 1.0940455198287964, + "eval_mean_token_accuracy": 0.8376890592850171, + "eval_num_tokens": 11321400.0, + "eval_runtime": 53.1357, + "eval_samples_per_second": 31.184, + "eval_steps_per_second": 3.915, + "step": 3850 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.1889703285185536e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/README.md b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/adapter_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3124ee21be605cc27948b88a54faff9a6b1988 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.04123083959940288, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "up_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/chat_template.jinja b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/tokenizer_config.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/trainer_state.json b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..736ba0d1ecf32df042d70bd76379b406d9d9ef9a --- /dev/null +++ b/DBCA_code_Estonian/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test2/checkpoint-770/trainer_state.json @@ -0,0 +1,206 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 770, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.856709834933281, + "epoch": 0.1299545159194282, + "grad_norm": 0.8576324582099915, + "learning_rate": 5.622316634653647e-05, + "loss": 1.7368515014648438, + "mean_token_accuracy": 0.6531239575147629, + "num_tokens": 154518.0, + "step": 50 + }, + { + "entropy": 0.7963882395625115, + "epoch": 0.2599090318388564, + "grad_norm": 0.478260338306427, + "learning_rate": 0.00011359374425116552, + "loss": 0.7638471984863281, + "mean_token_accuracy": 0.794710833132267, + "num_tokens": 306733.0, + "step": 100 + }, + { + "entropy": 0.72038290143013, + "epoch": 0.3898635477582846, + "grad_norm": 0.44744813442230225, + "learning_rate": 0.0001709643221557946, + "loss": 0.6789447021484375, + "mean_token_accuracy": 0.8134408834576606, + "num_tokens": 446267.0, + "step": 150 + }, + { + "entropy": 0.6589414384961129, + "epoch": 0.5198180636777128, + "grad_norm": 0.3264491856098175, + "learning_rate": 0.0002283349000604236, + "loss": 0.6165869903564453, + "mean_token_accuracy": 0.8283544909954071, + "num_tokens": 600256.0, + "step": 200 + }, + { + "entropy": 0.6685489591956139, + "epoch": 0.649772579597141, + "grad_norm": 0.46411946415901184, + "learning_rate": 0.0002857054779650527, + "loss": 0.6179034805297852, + "mean_token_accuracy": 0.8271848052740097, + "num_tokens": 738649.0, + "step": 250 + }, + { + "entropy": 0.6590392506122589, + "epoch": 0.7797270955165692, + "grad_norm": 0.4984032213687897, + "learning_rate": 0.00034307605586968176, + "loss": 0.6105824661254883, + "mean_token_accuracy": 0.8298770362138748, + "num_tokens": 883494.0, + "step": 300 + }, + { + "entropy": 0.6322308082878589, + "epoch": 0.9096816114359974, + "grad_norm": 0.35582953691482544, + "learning_rate": 0.0004004466337743108, + "loss": 0.5987993621826172, + "mean_token_accuracy": 0.8332566061615944, + "num_tokens": 1032111.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6668267184152052, + "eval_loss": 0.6812539100646973, + "eval_mean_token_accuracy": 0.8146296894321075, + "eval_num_tokens": 1132140.0, + "eval_runtime": 53.08, + "eval_samples_per_second": 31.217, + "eval_steps_per_second": 3.919, + "step": 385 + }, + { + "entropy": 0.6360254319169414, + "epoch": 1.0389863547758285, + "grad_norm": 0.537056028842926, + "learning_rate": 0.0004417356562802791, + "loss": 0.5937010192871094, + "mean_token_accuracy": 0.8332705884123567, + "num_tokens": 1177556.0, + "step": 400 + }, + { + "entropy": 0.5945304277539253, + "epoch": 1.1689408706952567, + "grad_norm": 0.4016346335411072, + "learning_rate": 0.00044138169957914594, + "loss": 0.564481430053711, + "mean_token_accuracy": 0.8422813892364502, + "num_tokens": 1325434.0, + "step": 450 + }, + { + "entropy": 0.5895386649668217, + "epoch": 1.2988953866146848, + "grad_norm": 0.32074230909347534, + "learning_rate": 0.00044057466037611984, + "loss": 0.5511369705200195, + "mean_token_accuracy": 0.8431353771686554, + "num_tokens": 1474116.0, + "step": 500 + }, + { + "entropy": 0.5951217761635781, + "epoch": 1.428849902534113, + "grad_norm": 0.3066493272781372, + "learning_rate": 0.00043931619693465644, + "loss": 0.5558741760253906, + "mean_token_accuracy": 0.84255860298872, + "num_tokens": 1621193.0, + "step": 550 + }, + { + "entropy": 0.5773422825336456, + "epoch": 1.5588044184535412, + "grad_norm": 0.38174429535865784, + "learning_rate": 0.00043760889508197114, + "loss": 0.5448895263671875, + "mean_token_accuracy": 0.8427786123752594, + "num_tokens": 1767789.0, + "step": 600 + }, + { + "entropy": 0.568668949753046, + "epoch": 1.6887589343729694, + "grad_norm": 0.4095861613750458, + "learning_rate": 0.0004354562628958121, + "loss": 0.5313994598388672, + "mean_token_accuracy": 0.847584228515625, + "num_tokens": 1918138.0, + "step": 650 + }, + { + "entropy": 0.5952580836415291, + "epoch": 1.8187134502923976, + "grad_norm": 0.34596189856529236, + "learning_rate": 0.0004328627234962387, + "loss": 0.5546007919311523, + "mean_token_accuracy": 0.8419265493750572, + "num_tokens": 2057575.0, + "step": 700 + }, + { + "entropy": 0.5800049532949925, + "epoch": 1.9486679662118258, + "grad_norm": 0.3752591609954834, + "learning_rate": 0.0004298336059572178, + "loss": 0.5452734756469727, + "mean_token_accuracy": 0.8436958035826683, + "num_tokens": 2208320.0, + "step": 750 + }, + { + "epoch": 2.0, + "eval_entropy": 0.6070062083980212, + "eval_loss": 0.631233274936676, + "eval_mean_token_accuracy": 0.8255004269572405, + "eval_num_tokens": 2264280.0, + "eval_runtime": 53.1092, + "eval_samples_per_second": 31.2, + "eval_steps_per_second": 3.916, + "step": 770 + } + ], + "logging_steps": 50, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0397744360926618e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1122/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1122/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1122/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1122/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1122/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1122/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1122/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1122/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..192068a8d4c4c99bb02e20dafa50b331717785af --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1122/trainer_state.json @@ -0,0 +1,287 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1122, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + }, + { + "entropy": 0.5630604237920106, + "epoch": 1.069611780455154, + "grad_norm": 0.7640048861503601, + "learning_rate": 0.00030023795987949866, + "loss": 0.5240223693847657, + "mean_token_accuracy": 0.8503808174470459, + "num_tokens": 1022581.0, + "step": 400 + }, + { + "entropy": 0.5431726336479187, + "epoch": 1.2034805890227578, + "grad_norm": 0.43694695830345154, + "learning_rate": 0.0002999111394498373, + "loss": 0.5033904266357422, + "mean_token_accuracy": 0.8546582865715027, + "num_tokens": 1151430.0, + "step": 450 + }, + { + "entropy": 0.539291135519743, + "epoch": 1.3373493975903614, + "grad_norm": 1.0249971151351929, + "learning_rate": 0.0002992582102002974, + "loss": 0.49736488342285157, + "mean_token_accuracy": 0.8569463074207306, + "num_tokens": 1282481.0, + "step": 500 + }, + { + "entropy": 0.5289052908122539, + "epoch": 1.4712182061579653, + "grad_norm": 0.862391471862793, + "learning_rate": 0.0002982805938010052, + "loss": 0.49582687377929685, + "mean_token_accuracy": 0.8568812811374664, + "num_tokens": 1414166.0, + "step": 550 + }, + { + "entropy": 0.532991835474968, + "epoch": 1.605087014725569, + "grad_norm": 0.44117477536201477, + "learning_rate": 0.000296980418886925, + "loss": 0.4954973220825195, + "mean_token_accuracy": 0.8582441991567612, + "num_tokens": 1547902.0, + "step": 600 + }, + { + "entropy": 0.5220623269677163, + "epoch": 1.7389558232931726, + "grad_norm": 0.42622247338294983, + "learning_rate": 0.00029536051642302825, + "loss": 0.4832091522216797, + "mean_token_accuracy": 0.8612468218803406, + "num_tokens": 1678635.0, + "step": 650 + }, + { + "entropy": 0.5149065843224525, + "epoch": 1.8728246318607764, + "grad_norm": 0.5336588621139526, + "learning_rate": 0.0002934244135402283, + "loss": 0.48042800903320315, + "mean_token_accuracy": 0.8626475504040718, + "num_tokens": 1803328.0, + "step": 700 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5772490365803242, + "eval_loss": 0.5702283978462219, + "eval_mean_token_accuracy": 0.8345756351947784, + "eval_num_tokens": 1919486.0, + "eval_runtime": 52.1598, + "eval_samples_per_second": 30.656, + "eval_steps_per_second": 3.834, + "step": 748 + }, + { + "entropy": 0.535869851106345, + "epoch": 2.005354752342704, + "grad_norm": 0.41517460346221924, + "learning_rate": 0.00029117632585550326, + "loss": 0.4852032470703125, + "mean_token_accuracy": 0.8596673321844351, + "num_tokens": 1925307.0, + "step": 750 + }, + { + "entropy": 0.447470418959856, + "epoch": 2.139223560910308, + "grad_norm": 0.3979392349720001, + "learning_rate": 0.00028862114829292755, + "loss": 0.4028204345703125, + "mean_token_accuracy": 0.8767653766274452, + "num_tokens": 2055627.0, + "step": 800 + }, + { + "entropy": 0.4631192748993635, + "epoch": 2.2730923694779115, + "grad_norm": 0.5497493147850037, + "learning_rate": 0.0002857644444255997, + "loss": 0.41807849884033205, + "mean_token_accuracy": 0.8721370902657509, + "num_tokens": 2177850.0, + "step": 850 + }, + { + "entropy": 0.4618495012819767, + "epoch": 2.4069611780455156, + "grad_norm": 0.5762711763381958, + "learning_rate": 0.00028261243436167164, + "loss": 0.4263508987426758, + "mean_token_accuracy": 0.8728304222226143, + "num_tokens": 2304612.0, + "step": 900 + }, + { + "entropy": 0.46360296294093134, + "epoch": 2.540829986613119, + "grad_norm": 0.4397684335708618, + "learning_rate": 0.00027917198120085693, + "loss": 0.4204945373535156, + "mean_token_accuracy": 0.8729385876655579, + "num_tokens": 2430946.0, + "step": 950 + }, + { + "entropy": 0.4606146043539047, + "epoch": 2.674698795180723, + "grad_norm": 0.49730950593948364, + "learning_rate": 0.0002754505760909068, + "loss": 0.41891841888427733, + "mean_token_accuracy": 0.8735697677731514, + "num_tokens": 2557241.0, + "step": 1000 + }, + { + "entropy": 0.4599393020570278, + "epoch": 2.8085676037483265, + "grad_norm": 0.3867562711238861, + "learning_rate": 0.00027145632191659207, + "loss": 0.4164935302734375, + "mean_token_accuracy": 0.8750462782382965, + "num_tokens": 2692775.0, + "step": 1050 + }, + { + "entropy": 0.4657205778360367, + "epoch": 2.9424364123159306, + "grad_norm": 0.3775743246078491, + "learning_rate": 0.0002671979156567051, + "loss": 0.42220756530761716, + "mean_token_accuracy": 0.8735427415370941, + "num_tokens": 2823060.0, + "step": 1100 + }, + { + "epoch": 3.0, + "eval_entropy": 0.49825115099549294, + "eval_loss": 0.562646746635437, + "eval_mean_token_accuracy": 0.8420541244745254, + "eval_num_tokens": 2879229.0, + "eval_runtime": 52.1338, + "eval_samples_per_second": 30.671, + "eval_steps_per_second": 3.836, + "step": 1122 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.278654626405806e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/README.md b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/tokenizer_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7ada58d2b7c1f2a14a54e2d17f90a2683f3e2c59 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1496/trainer_state.json @@ -0,0 +1,368 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 1496, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + }, + { + "entropy": 0.5630604237920106, + "epoch": 1.069611780455154, + "grad_norm": 0.7640048861503601, + "learning_rate": 0.00030023795987949866, + "loss": 0.5240223693847657, + "mean_token_accuracy": 0.8503808174470459, + "num_tokens": 1022581.0, + "step": 400 + }, + { + "entropy": 0.5431726336479187, + "epoch": 1.2034805890227578, + "grad_norm": 0.43694695830345154, + "learning_rate": 0.0002999111394498373, + "loss": 0.5033904266357422, + "mean_token_accuracy": 0.8546582865715027, + "num_tokens": 1151430.0, + "step": 450 + }, + { + "entropy": 0.539291135519743, + "epoch": 1.3373493975903614, + "grad_norm": 1.0249971151351929, + "learning_rate": 0.0002992582102002974, + "loss": 0.49736488342285157, + "mean_token_accuracy": 0.8569463074207306, + "num_tokens": 1282481.0, + "step": 500 + }, + { + "entropy": 0.5289052908122539, + "epoch": 1.4712182061579653, + "grad_norm": 0.862391471862793, + "learning_rate": 0.0002982805938010052, + "loss": 0.49582687377929685, + "mean_token_accuracy": 0.8568812811374664, + "num_tokens": 1414166.0, + "step": 550 + }, + { + "entropy": 0.532991835474968, + "epoch": 1.605087014725569, + "grad_norm": 0.44117477536201477, + "learning_rate": 0.000296980418886925, + "loss": 0.4954973220825195, + "mean_token_accuracy": 0.8582441991567612, + "num_tokens": 1547902.0, + "step": 600 + }, + { + "entropy": 0.5220623269677163, + "epoch": 1.7389558232931726, + "grad_norm": 0.42622247338294983, + "learning_rate": 0.00029536051642302825, + "loss": 0.4832091522216797, + "mean_token_accuracy": 0.8612468218803406, + "num_tokens": 1678635.0, + "step": 650 + }, + { + "entropy": 0.5149065843224525, + "epoch": 1.8728246318607764, + "grad_norm": 0.5336588621139526, + "learning_rate": 0.0002934244135402283, + "loss": 0.48042800903320315, + "mean_token_accuracy": 0.8626475504040718, + "num_tokens": 1803328.0, + "step": 700 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5772490365803242, + "eval_loss": 0.5702283978462219, + "eval_mean_token_accuracy": 0.8345756351947784, + "eval_num_tokens": 1919486.0, + "eval_runtime": 52.1598, + "eval_samples_per_second": 30.656, + "eval_steps_per_second": 3.834, + "step": 748 + }, + { + "entropy": 0.535869851106345, + "epoch": 2.005354752342704, + "grad_norm": 0.41517460346221924, + "learning_rate": 0.00029117632585550326, + "loss": 0.4852032470703125, + "mean_token_accuracy": 0.8596673321844351, + "num_tokens": 1925307.0, + "step": 750 + }, + { + "entropy": 0.447470418959856, + "epoch": 2.139223560910308, + "grad_norm": 0.3979392349720001, + "learning_rate": 0.00028862114829292755, + "loss": 0.4028204345703125, + "mean_token_accuracy": 0.8767653766274452, + "num_tokens": 2055627.0, + "step": 800 + }, + { + "entropy": 0.4631192748993635, + "epoch": 2.2730923694779115, + "grad_norm": 0.5497493147850037, + "learning_rate": 0.0002857644444255997, + "loss": 0.41807849884033205, + "mean_token_accuracy": 0.8721370902657509, + "num_tokens": 2177850.0, + "step": 850 + }, + { + "entropy": 0.4618495012819767, + "epoch": 2.4069611780455156, + "grad_norm": 0.5762711763381958, + "learning_rate": 0.00028261243436167164, + "loss": 0.4263508987426758, + "mean_token_accuracy": 0.8728304222226143, + "num_tokens": 2304612.0, + "step": 900 + }, + { + "entropy": 0.46360296294093134, + "epoch": 2.540829986613119, + "grad_norm": 0.4397684335708618, + "learning_rate": 0.00027917198120085693, + "loss": 0.4204945373535156, + "mean_token_accuracy": 0.8729385876655579, + "num_tokens": 2430946.0, + "step": 950 + }, + { + "entropy": 0.4606146043539047, + "epoch": 2.674698795180723, + "grad_norm": 0.49730950593948364, + "learning_rate": 0.0002754505760909068, + "loss": 0.41891841888427733, + "mean_token_accuracy": 0.8735697677731514, + "num_tokens": 2557241.0, + "step": 1000 + }, + { + "entropy": 0.4599393020570278, + "epoch": 2.8085676037483265, + "grad_norm": 0.3867562711238861, + "learning_rate": 0.00027145632191659207, + "loss": 0.4164935302734375, + "mean_token_accuracy": 0.8750462782382965, + "num_tokens": 2692775.0, + "step": 1050 + }, + { + "entropy": 0.4657205778360367, + "epoch": 2.9424364123159306, + "grad_norm": 0.3775743246078491, + "learning_rate": 0.0002671979156567051, + "loss": 0.42220756530761716, + "mean_token_accuracy": 0.8735427415370941, + "num_tokens": 2823060.0, + "step": 1100 + }, + { + "epoch": 3.0, + "eval_entropy": 0.49825115099549294, + "eval_loss": 0.562646746635437, + "eval_mean_token_accuracy": 0.8420541244745254, + "eval_num_tokens": 2879229.0, + "eval_runtime": 52.1338, + "eval_samples_per_second": 30.671, + "eval_steps_per_second": 3.836, + "step": 1122 + }, + { + "entropy": 0.4122138220553446, + "epoch": 3.074966532797858, + "grad_norm": 0.4913100302219391, + "learning_rate": 0.0002626846294474985, + "loss": 0.3638055419921875, + "mean_token_accuracy": 0.8879981071057946, + "num_tokens": 2955424.0, + "step": 1150 + }, + { + "entropy": 0.38473270788788794, + "epoch": 3.208835341365462, + "grad_norm": 0.5340412259101868, + "learning_rate": 0.00025792629039379165, + "loss": 0.3335049438476563, + "mean_token_accuracy": 0.8928974050283432, + "num_tokens": 3078799.0, + "step": 1200 + }, + { + "entropy": 0.3838996239006519, + "epoch": 3.3427041499330654, + "grad_norm": 0.4576664865016937, + "learning_rate": 0.0002529332591717036, + "loss": 0.3357630157470703, + "mean_token_accuracy": 0.8939724805951118, + "num_tokens": 3211945.0, + "step": 1250 + }, + { + "entropy": 0.3874890775978565, + "epoch": 3.4765729585006695, + "grad_norm": 0.4499485194683075, + "learning_rate": 0.000247716407469603, + "loss": 0.3426588821411133, + "mean_token_accuracy": 0.8922444903850555, + "num_tokens": 3343933.0, + "step": 1300 + }, + { + "entropy": 0.3952706679701805, + "epoch": 3.610441767068273, + "grad_norm": 0.4777601957321167, + "learning_rate": 0.00024228709431639412, + "loss": 0.34926448822021483, + "mean_token_accuracy": 0.8901252299547195, + "num_tokens": 3470087.0, + "step": 1350 + }, + { + "entropy": 0.39983385235071184, + "epoch": 3.7443105756358768, + "grad_norm": 0.36449742317199707, + "learning_rate": 0.00023665714134868044, + "loss": 0.3480478286743164, + "mean_token_accuracy": 0.890526123046875, + "num_tokens": 3590858.0, + "step": 1400 + }, + { + "entropy": 0.3916309730708599, + "epoch": 3.878179384203481, + "grad_norm": 0.37300795316696167, + "learning_rate": 0.00023083880707065973, + "loss": 0.349254150390625, + "mean_token_accuracy": 0.8909555944800377, + "num_tokens": 3725806.0, + "step": 1450 + }, + { + "epoch": 4.0, + "eval_entropy": 0.4629781968891621, + "eval_loss": 0.5623059868812561, + "eval_mean_token_accuracy": 0.8462381008267402, + "eval_num_tokens": 3838972.0, + "eval_runtime": 52.0941, + "eval_samples_per_second": 30.694, + "eval_steps_per_second": 3.839, + "step": 1496 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7048311292497e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/README.md b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/tokenizer_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..660242782facc2f3d664303427ffc20121c13d33 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-1870/trainer_state.json @@ -0,0 +1,459 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1870, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + }, + { + "entropy": 0.5630604237920106, + "epoch": 1.069611780455154, + "grad_norm": 0.7640048861503601, + "learning_rate": 0.00030023795987949866, + "loss": 0.5240223693847657, + "mean_token_accuracy": 0.8503808174470459, + "num_tokens": 1022581.0, + "step": 400 + }, + { + "entropy": 0.5431726336479187, + "epoch": 1.2034805890227578, + "grad_norm": 0.43694695830345154, + "learning_rate": 0.0002999111394498373, + "loss": 0.5033904266357422, + "mean_token_accuracy": 0.8546582865715027, + "num_tokens": 1151430.0, + "step": 450 + }, + { + "entropy": 0.539291135519743, + "epoch": 1.3373493975903614, + "grad_norm": 1.0249971151351929, + "learning_rate": 0.0002992582102002974, + "loss": 0.49736488342285157, + "mean_token_accuracy": 0.8569463074207306, + "num_tokens": 1282481.0, + "step": 500 + }, + { + "entropy": 0.5289052908122539, + "epoch": 1.4712182061579653, + "grad_norm": 0.862391471862793, + "learning_rate": 0.0002982805938010052, + "loss": 0.49582687377929685, + "mean_token_accuracy": 0.8568812811374664, + "num_tokens": 1414166.0, + "step": 550 + }, + { + "entropy": 0.532991835474968, + "epoch": 1.605087014725569, + "grad_norm": 0.44117477536201477, + "learning_rate": 0.000296980418886925, + "loss": 0.4954973220825195, + "mean_token_accuracy": 0.8582441991567612, + "num_tokens": 1547902.0, + "step": 600 + }, + { + "entropy": 0.5220623269677163, + "epoch": 1.7389558232931726, + "grad_norm": 0.42622247338294983, + "learning_rate": 0.00029536051642302825, + "loss": 0.4832091522216797, + "mean_token_accuracy": 0.8612468218803406, + "num_tokens": 1678635.0, + "step": 650 + }, + { + "entropy": 0.5149065843224525, + "epoch": 1.8728246318607764, + "grad_norm": 0.5336588621139526, + "learning_rate": 0.0002934244135402283, + "loss": 0.48042800903320315, + "mean_token_accuracy": 0.8626475504040718, + "num_tokens": 1803328.0, + "step": 700 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5772490365803242, + "eval_loss": 0.5702283978462219, + "eval_mean_token_accuracy": 0.8345756351947784, + "eval_num_tokens": 1919486.0, + "eval_runtime": 52.1598, + "eval_samples_per_second": 30.656, + "eval_steps_per_second": 3.834, + "step": 748 + }, + { + "entropy": 0.535869851106345, + "epoch": 2.005354752342704, + "grad_norm": 0.41517460346221924, + "learning_rate": 0.00029117632585550326, + "loss": 0.4852032470703125, + "mean_token_accuracy": 0.8596673321844351, + "num_tokens": 1925307.0, + "step": 750 + }, + { + "entropy": 0.447470418959856, + "epoch": 2.139223560910308, + "grad_norm": 0.3979392349720001, + "learning_rate": 0.00028862114829292755, + "loss": 0.4028204345703125, + "mean_token_accuracy": 0.8767653766274452, + "num_tokens": 2055627.0, + "step": 800 + }, + { + "entropy": 0.4631192748993635, + "epoch": 2.2730923694779115, + "grad_norm": 0.5497493147850037, + "learning_rate": 0.0002857644444255997, + "loss": 0.41807849884033205, + "mean_token_accuracy": 0.8721370902657509, + "num_tokens": 2177850.0, + "step": 850 + }, + { + "entropy": 0.4618495012819767, + "epoch": 2.4069611780455156, + "grad_norm": 0.5762711763381958, + "learning_rate": 0.00028261243436167164, + "loss": 0.4263508987426758, + "mean_token_accuracy": 0.8728304222226143, + "num_tokens": 2304612.0, + "step": 900 + }, + { + "entropy": 0.46360296294093134, + "epoch": 2.540829986613119, + "grad_norm": 0.4397684335708618, + "learning_rate": 0.00027917198120085693, + "loss": 0.4204945373535156, + "mean_token_accuracy": 0.8729385876655579, + "num_tokens": 2430946.0, + "step": 950 + }, + { + "entropy": 0.4606146043539047, + "epoch": 2.674698795180723, + "grad_norm": 0.49730950593948364, + "learning_rate": 0.0002754505760909068, + "loss": 0.41891841888427733, + "mean_token_accuracy": 0.8735697677731514, + "num_tokens": 2557241.0, + "step": 1000 + }, + { + "entropy": 0.4599393020570278, + "epoch": 2.8085676037483265, + "grad_norm": 0.3867562711238861, + "learning_rate": 0.00027145632191659207, + "loss": 0.4164935302734375, + "mean_token_accuracy": 0.8750462782382965, + "num_tokens": 2692775.0, + "step": 1050 + }, + { + "entropy": 0.4657205778360367, + "epoch": 2.9424364123159306, + "grad_norm": 0.3775743246078491, + "learning_rate": 0.0002671979156567051, + "loss": 0.42220756530761716, + "mean_token_accuracy": 0.8735427415370941, + "num_tokens": 2823060.0, + "step": 1100 + }, + { + "epoch": 3.0, + "eval_entropy": 0.49825115099549294, + "eval_loss": 0.562646746635437, + "eval_mean_token_accuracy": 0.8420541244745254, + "eval_num_tokens": 2879229.0, + "eval_runtime": 52.1338, + "eval_samples_per_second": 30.671, + "eval_steps_per_second": 3.836, + "step": 1122 + }, + { + "entropy": 0.4122138220553446, + "epoch": 3.074966532797858, + "grad_norm": 0.4913100302219391, + "learning_rate": 0.0002626846294474985, + "loss": 0.3638055419921875, + "mean_token_accuracy": 0.8879981071057946, + "num_tokens": 2955424.0, + "step": 1150 + }, + { + "entropy": 0.38473270788788794, + "epoch": 3.208835341365462, + "grad_norm": 0.5340412259101868, + "learning_rate": 0.00025792629039379165, + "loss": 0.3335049438476563, + "mean_token_accuracy": 0.8928974050283432, + "num_tokens": 3078799.0, + "step": 1200 + }, + { + "entropy": 0.3838996239006519, + "epoch": 3.3427041499330654, + "grad_norm": 0.4576664865016937, + "learning_rate": 0.0002529332591717036, + "loss": 0.3357630157470703, + "mean_token_accuracy": 0.8939724805951118, + "num_tokens": 3211945.0, + "step": 1250 + }, + { + "entropy": 0.3874890775978565, + "epoch": 3.4765729585006695, + "grad_norm": 0.4499485194683075, + "learning_rate": 0.000247716407469603, + "loss": 0.3426588821411133, + "mean_token_accuracy": 0.8922444903850555, + "num_tokens": 3343933.0, + "step": 1300 + }, + { + "entropy": 0.3952706679701805, + "epoch": 3.610441767068273, + "grad_norm": 0.4777601957321167, + "learning_rate": 0.00024228709431639412, + "loss": 0.34926448822021483, + "mean_token_accuracy": 0.8901252299547195, + "num_tokens": 3470087.0, + "step": 1350 + }, + { + "entropy": 0.39983385235071184, + "epoch": 3.7443105756358768, + "grad_norm": 0.36449742317199707, + "learning_rate": 0.00023665714134868044, + "loss": 0.3480478286743164, + "mean_token_accuracy": 0.890526123046875, + "num_tokens": 3590858.0, + "step": 1400 + }, + { + "entropy": 0.3916309730708599, + "epoch": 3.878179384203481, + "grad_norm": 0.37300795316696167, + "learning_rate": 0.00023083880707065973, + "loss": 0.349254150390625, + "mean_token_accuracy": 0.8909555944800377, + "num_tokens": 3725806.0, + "step": 1450 + }, + { + "epoch": 4.0, + "eval_entropy": 0.4629781968891621, + "eval_loss": 0.5623059868812561, + "eval_mean_token_accuracy": 0.8462381008267402, + "eval_num_tokens": 3838972.0, + "eval_runtime": 52.0941, + "eval_samples_per_second": 30.694, + "eval_steps_per_second": 3.839, + "step": 1496 + }, + { + "entropy": 0.39569308842071377, + "epoch": 4.010709504685408, + "grad_norm": 0.40932103991508484, + "learning_rate": 0.0002248447601627953, + "loss": 0.3418621063232422, + "mean_token_accuracy": 0.8925740992782092, + "num_tokens": 3849380.0, + "step": 1500 + }, + { + "entropy": 0.29304657004773615, + "epoch": 4.144578313253012, + "grad_norm": 0.5317501425743103, + "learning_rate": 0.00021868805189738066, + "loss": 0.2386075210571289, + "mean_token_accuracy": 0.9206075271964074, + "num_tokens": 3976694.0, + "step": 1550 + }, + { + "entropy": 0.2991082117706537, + "epoch": 4.278447121820616, + "grad_norm": 0.5011683106422424, + "learning_rate": 0.00021238208772105958, + "loss": 0.2479239273071289, + "mean_token_accuracy": 0.9165873003005981, + "num_tokens": 4110204.0, + "step": 1600 + }, + { + "entropy": 0.2965650236606598, + "epoch": 4.412315930388219, + "grad_norm": 0.45324602723121643, + "learning_rate": 0.00020594059806617674, + "loss": 0.24801044464111327, + "mean_token_accuracy": 0.9172127342224121, + "num_tokens": 4240883.0, + "step": 1650 + }, + { + "entropy": 0.30350560761988166, + "epoch": 4.546184738955823, + "grad_norm": 0.4740602970123291, + "learning_rate": 0.00019937760845451308, + "loss": 0.2523613929748535, + "mean_token_accuracy": 0.9163035461306572, + "num_tokens": 4365381.0, + "step": 1700 + }, + { + "entropy": 0.3076958200335503, + "epoch": 4.680053547523427, + "grad_norm": 0.4616807699203491, + "learning_rate": 0.00019270740895850172, + "loss": 0.25745229721069335, + "mean_token_accuracy": 0.9147638303041458, + "num_tokens": 4490711.0, + "step": 1750 + }, + { + "entropy": 0.3103550442308187, + "epoch": 4.813922356091031, + "grad_norm": 0.5542571544647217, + "learning_rate": 0.00018594452308641905, + "loss": 0.2601847457885742, + "mean_token_accuracy": 0.9133006593585015, + "num_tokens": 4618532.0, + "step": 1800 + }, + { + "entropy": 0.30610986322164535, + "epoch": 4.947791164658635, + "grad_norm": 0.43830257654190063, + "learning_rate": 0.00017910367615929835, + "loss": 0.25211355209350583, + "mean_token_accuracy": 0.9155091819167137, + "num_tokens": 4752261.0, + "step": 1850 + }, + { + "epoch": 5.0, + "eval_entropy": 0.3738320705294609, + "eval_loss": 0.6202279925346375, + "eval_mean_token_accuracy": 0.8485285672545433, + "eval_num_tokens": 4798715.0, + "eval_runtime": 52.1176, + "eval_samples_per_second": 30.681, + "eval_steps_per_second": 3.837, + "step": 1870 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.1290172055016755e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/README.md b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/tokenizer_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a624a7de82f7cca9a5e4b426f89b9843517f8cf7 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2244/trainer_state.json @@ -0,0 +1,540 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 2244, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + }, + { + "entropy": 0.5630604237920106, + "epoch": 1.069611780455154, + "grad_norm": 0.7640048861503601, + "learning_rate": 0.00030023795987949866, + "loss": 0.5240223693847657, + "mean_token_accuracy": 0.8503808174470459, + "num_tokens": 1022581.0, + "step": 400 + }, + { + "entropy": 0.5431726336479187, + "epoch": 1.2034805890227578, + "grad_norm": 0.43694695830345154, + "learning_rate": 0.0002999111394498373, + "loss": 0.5033904266357422, + "mean_token_accuracy": 0.8546582865715027, + "num_tokens": 1151430.0, + "step": 450 + }, + { + "entropy": 0.539291135519743, + "epoch": 1.3373493975903614, + "grad_norm": 1.0249971151351929, + "learning_rate": 0.0002992582102002974, + "loss": 0.49736488342285157, + "mean_token_accuracy": 0.8569463074207306, + "num_tokens": 1282481.0, + "step": 500 + }, + { + "entropy": 0.5289052908122539, + "epoch": 1.4712182061579653, + "grad_norm": 0.862391471862793, + "learning_rate": 0.0002982805938010052, + "loss": 0.49582687377929685, + "mean_token_accuracy": 0.8568812811374664, + "num_tokens": 1414166.0, + "step": 550 + }, + { + "entropy": 0.532991835474968, + "epoch": 1.605087014725569, + "grad_norm": 0.44117477536201477, + "learning_rate": 0.000296980418886925, + "loss": 0.4954973220825195, + "mean_token_accuracy": 0.8582441991567612, + "num_tokens": 1547902.0, + "step": 600 + }, + { + "entropy": 0.5220623269677163, + "epoch": 1.7389558232931726, + "grad_norm": 0.42622247338294983, + "learning_rate": 0.00029536051642302825, + "loss": 0.4832091522216797, + "mean_token_accuracy": 0.8612468218803406, + "num_tokens": 1678635.0, + "step": 650 + }, + { + "entropy": 0.5149065843224525, + "epoch": 1.8728246318607764, + "grad_norm": 0.5336588621139526, + "learning_rate": 0.0002934244135402283, + "loss": 0.48042800903320315, + "mean_token_accuracy": 0.8626475504040718, + "num_tokens": 1803328.0, + "step": 700 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5772490365803242, + "eval_loss": 0.5702283978462219, + "eval_mean_token_accuracy": 0.8345756351947784, + "eval_num_tokens": 1919486.0, + "eval_runtime": 52.1598, + "eval_samples_per_second": 30.656, + "eval_steps_per_second": 3.834, + "step": 748 + }, + { + "entropy": 0.535869851106345, + "epoch": 2.005354752342704, + "grad_norm": 0.41517460346221924, + "learning_rate": 0.00029117632585550326, + "loss": 0.4852032470703125, + "mean_token_accuracy": 0.8596673321844351, + "num_tokens": 1925307.0, + "step": 750 + }, + { + "entropy": 0.447470418959856, + "epoch": 2.139223560910308, + "grad_norm": 0.3979392349720001, + "learning_rate": 0.00028862114829292755, + "loss": 0.4028204345703125, + "mean_token_accuracy": 0.8767653766274452, + "num_tokens": 2055627.0, + "step": 800 + }, + { + "entropy": 0.4631192748993635, + "epoch": 2.2730923694779115, + "grad_norm": 0.5497493147850037, + "learning_rate": 0.0002857644444255997, + "loss": 0.41807849884033205, + "mean_token_accuracy": 0.8721370902657509, + "num_tokens": 2177850.0, + "step": 850 + }, + { + "entropy": 0.4618495012819767, + "epoch": 2.4069611780455156, + "grad_norm": 0.5762711763381958, + "learning_rate": 0.00028261243436167164, + "loss": 0.4263508987426758, + "mean_token_accuracy": 0.8728304222226143, + "num_tokens": 2304612.0, + "step": 900 + }, + { + "entropy": 0.46360296294093134, + "epoch": 2.540829986613119, + "grad_norm": 0.4397684335708618, + "learning_rate": 0.00027917198120085693, + "loss": 0.4204945373535156, + "mean_token_accuracy": 0.8729385876655579, + "num_tokens": 2430946.0, + "step": 950 + }, + { + "entropy": 0.4606146043539047, + "epoch": 2.674698795180723, + "grad_norm": 0.49730950593948364, + "learning_rate": 0.0002754505760909068, + "loss": 0.41891841888427733, + "mean_token_accuracy": 0.8735697677731514, + "num_tokens": 2557241.0, + "step": 1000 + }, + { + "entropy": 0.4599393020570278, + "epoch": 2.8085676037483265, + "grad_norm": 0.3867562711238861, + "learning_rate": 0.00027145632191659207, + "loss": 0.4164935302734375, + "mean_token_accuracy": 0.8750462782382965, + "num_tokens": 2692775.0, + "step": 1050 + }, + { + "entropy": 0.4657205778360367, + "epoch": 2.9424364123159306, + "grad_norm": 0.3775743246078491, + "learning_rate": 0.0002671979156567051, + "loss": 0.42220756530761716, + "mean_token_accuracy": 0.8735427415370941, + "num_tokens": 2823060.0, + "step": 1100 + }, + { + "epoch": 3.0, + "eval_entropy": 0.49825115099549294, + "eval_loss": 0.562646746635437, + "eval_mean_token_accuracy": 0.8420541244745254, + "eval_num_tokens": 2879229.0, + "eval_runtime": 52.1338, + "eval_samples_per_second": 30.671, + "eval_steps_per_second": 3.836, + "step": 1122 + }, + { + "entropy": 0.4122138220553446, + "epoch": 3.074966532797858, + "grad_norm": 0.4913100302219391, + "learning_rate": 0.0002626846294474985, + "loss": 0.3638055419921875, + "mean_token_accuracy": 0.8879981071057946, + "num_tokens": 2955424.0, + "step": 1150 + }, + { + "entropy": 0.38473270788788794, + "epoch": 3.208835341365462, + "grad_norm": 0.5340412259101868, + "learning_rate": 0.00025792629039379165, + "loss": 0.3335049438476563, + "mean_token_accuracy": 0.8928974050283432, + "num_tokens": 3078799.0, + "step": 1200 + }, + { + "entropy": 0.3838996239006519, + "epoch": 3.3427041499330654, + "grad_norm": 0.4576664865016937, + "learning_rate": 0.0002529332591717036, + "loss": 0.3357630157470703, + "mean_token_accuracy": 0.8939724805951118, + "num_tokens": 3211945.0, + "step": 1250 + }, + { + "entropy": 0.3874890775978565, + "epoch": 3.4765729585006695, + "grad_norm": 0.4499485194683075, + "learning_rate": 0.000247716407469603, + "loss": 0.3426588821411133, + "mean_token_accuracy": 0.8922444903850555, + "num_tokens": 3343933.0, + "step": 1300 + }, + { + "entropy": 0.3952706679701805, + "epoch": 3.610441767068273, + "grad_norm": 0.4777601957321167, + "learning_rate": 0.00024228709431639412, + "loss": 0.34926448822021483, + "mean_token_accuracy": 0.8901252299547195, + "num_tokens": 3470087.0, + "step": 1350 + }, + { + "entropy": 0.39983385235071184, + "epoch": 3.7443105756358768, + "grad_norm": 0.36449742317199707, + "learning_rate": 0.00023665714134868044, + "loss": 0.3480478286743164, + "mean_token_accuracy": 0.890526123046875, + "num_tokens": 3590858.0, + "step": 1400 + }, + { + "entropy": 0.3916309730708599, + "epoch": 3.878179384203481, + "grad_norm": 0.37300795316696167, + "learning_rate": 0.00023083880707065973, + "loss": 0.349254150390625, + "mean_token_accuracy": 0.8909555944800377, + "num_tokens": 3725806.0, + "step": 1450 + }, + { + "epoch": 4.0, + "eval_entropy": 0.4629781968891621, + "eval_loss": 0.5623059868812561, + "eval_mean_token_accuracy": 0.8462381008267402, + "eval_num_tokens": 3838972.0, + "eval_runtime": 52.0941, + "eval_samples_per_second": 30.694, + "eval_steps_per_second": 3.839, + "step": 1496 + }, + { + "entropy": 0.39569308842071377, + "epoch": 4.010709504685408, + "grad_norm": 0.40932103991508484, + "learning_rate": 0.0002248447601627953, + "loss": 0.3418621063232422, + "mean_token_accuracy": 0.8925740992782092, + "num_tokens": 3849380.0, + "step": 1500 + }, + { + "entropy": 0.29304657004773615, + "epoch": 4.144578313253012, + "grad_norm": 0.5317501425743103, + "learning_rate": 0.00021868805189738066, + "loss": 0.2386075210571289, + "mean_token_accuracy": 0.9206075271964074, + "num_tokens": 3976694.0, + "step": 1550 + }, + { + "entropy": 0.2991082117706537, + "epoch": 4.278447121820616, + "grad_norm": 0.5011683106422424, + "learning_rate": 0.00021238208772105958, + "loss": 0.2479239273071289, + "mean_token_accuracy": 0.9165873003005981, + "num_tokens": 4110204.0, + "step": 1600 + }, + { + "entropy": 0.2965650236606598, + "epoch": 4.412315930388219, + "grad_norm": 0.45324602723121643, + "learning_rate": 0.00020594059806617674, + "loss": 0.24801044464111327, + "mean_token_accuracy": 0.9172127342224121, + "num_tokens": 4240883.0, + "step": 1650 + }, + { + "entropy": 0.30350560761988166, + "epoch": 4.546184738955823, + "grad_norm": 0.4740602970123291, + "learning_rate": 0.00019937760845451308, + "loss": 0.2523613929748535, + "mean_token_accuracy": 0.9163035461306572, + "num_tokens": 4365381.0, + "step": 1700 + }, + { + "entropy": 0.3076958200335503, + "epoch": 4.680053547523427, + "grad_norm": 0.4616807699203491, + "learning_rate": 0.00019270740895850172, + "loss": 0.25745229721069335, + "mean_token_accuracy": 0.9147638303041458, + "num_tokens": 4490711.0, + "step": 1750 + }, + { + "entropy": 0.3103550442308187, + "epoch": 4.813922356091031, + "grad_norm": 0.5542571544647217, + "learning_rate": 0.00018594452308641905, + "loss": 0.2601847457885742, + "mean_token_accuracy": 0.9133006593585015, + "num_tokens": 4618532.0, + "step": 1800 + }, + { + "entropy": 0.30610986322164535, + "epoch": 4.947791164658635, + "grad_norm": 0.43830257654190063, + "learning_rate": 0.00017910367615929835, + "loss": 0.25211355209350583, + "mean_token_accuracy": 0.9155091819167137, + "num_tokens": 4752261.0, + "step": 1850 + }, + { + "epoch": 5.0, + "eval_entropy": 0.3738320705294609, + "eval_loss": 0.6202279925346375, + "eval_mean_token_accuracy": 0.8485285672545433, + "eval_num_tokens": 4798715.0, + "eval_runtime": 52.1176, + "eval_samples_per_second": 30.681, + "eval_steps_per_second": 3.837, + "step": 1870 + }, + { + "entropy": 0.24846129032849062, + "epoch": 5.080321285140562, + "grad_norm": 0.3632349371910095, + "learning_rate": 0.00017219976324842225, + "loss": 0.1933494758605957, + "mean_token_accuracy": 0.935125366906927, + "num_tokens": 4879271.0, + "step": 1900 + }, + { + "entropy": 0.211940533593297, + "epoch": 5.214190093708166, + "grad_norm": 0.5480105876922607, + "learning_rate": 0.00016524781674320582, + "loss": 0.15619863510131837, + "mean_token_accuracy": 0.9460694769024849, + "num_tokens": 5005076.0, + "step": 1950 + }, + { + "entropy": 0.214038780964911, + "epoch": 5.34805890227577, + "grad_norm": 0.5447636842727661, + "learning_rate": 0.00015826297362008662, + "loss": 0.15696516990661621, + "mean_token_accuracy": 0.9461162313818932, + "num_tokens": 5133591.0, + "step": 2000 + }, + { + "entropy": 0.21759917587041855, + "epoch": 5.481927710843373, + "grad_norm": 0.48160865902900696, + "learning_rate": 0.00015126044248368997, + "loss": 0.162998046875, + "mean_token_accuracy": 0.9450593250989914, + "num_tokens": 5257252.0, + "step": 2050 + }, + { + "entropy": 0.21842033743858338, + "epoch": 5.615796519410977, + "grad_norm": 0.5399336218833923, + "learning_rate": 0.00014425547045203324, + "loss": 0.1642344856262207, + "mean_token_accuracy": 0.9428097534179688, + "num_tokens": 5388652.0, + "step": 2100 + }, + { + "entropy": 0.21970326244831084, + "epoch": 5.749665327978581, + "grad_norm": 0.6201577186584473, + "learning_rate": 0.00013726330995787156, + "loss": 0.16450761795043944, + "mean_token_accuracy": 0.9430688858032227, + "num_tokens": 5515488.0, + "step": 2150 + }, + { + "entropy": 0.2100747512280941, + "epoch": 5.883534136546185, + "grad_norm": 0.5263391137123108, + "learning_rate": 0.00013029918553847215, + "loss": 0.15882587432861328, + "mean_token_accuracy": 0.9454078593850136, + "num_tokens": 5644323.0, + "step": 2200 + }, + { + "epoch": 6.0, + "eval_entropy": 0.30004522860050203, + "eval_loss": 0.710449755191803, + "eval_mean_token_accuracy": 0.8465994608402252, + "eval_num_tokens": 5758458.0, + "eval_runtime": 52.1136, + "eval_samples_per_second": 30.683, + "eval_steps_per_second": 3.838, + "step": 2244 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.551546113050757e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/README.md b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/tokenizer_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..86170877f70df7e6497e3fbfb5869bd2cab310ad --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2618/trainer_state.json @@ -0,0 +1,631 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 2618, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + }, + { + "entropy": 0.5630604237920106, + "epoch": 1.069611780455154, + "grad_norm": 0.7640048861503601, + "learning_rate": 0.00030023795987949866, + "loss": 0.5240223693847657, + "mean_token_accuracy": 0.8503808174470459, + "num_tokens": 1022581.0, + "step": 400 + }, + { + "entropy": 0.5431726336479187, + "epoch": 1.2034805890227578, + "grad_norm": 0.43694695830345154, + "learning_rate": 0.0002999111394498373, + "loss": 0.5033904266357422, + "mean_token_accuracy": 0.8546582865715027, + "num_tokens": 1151430.0, + "step": 450 + }, + { + "entropy": 0.539291135519743, + "epoch": 1.3373493975903614, + "grad_norm": 1.0249971151351929, + "learning_rate": 0.0002992582102002974, + "loss": 0.49736488342285157, + "mean_token_accuracy": 0.8569463074207306, + "num_tokens": 1282481.0, + "step": 500 + }, + { + "entropy": 0.5289052908122539, + "epoch": 1.4712182061579653, + "grad_norm": 0.862391471862793, + "learning_rate": 0.0002982805938010052, + "loss": 0.49582687377929685, + "mean_token_accuracy": 0.8568812811374664, + "num_tokens": 1414166.0, + "step": 550 + }, + { + "entropy": 0.532991835474968, + "epoch": 1.605087014725569, + "grad_norm": 0.44117477536201477, + "learning_rate": 0.000296980418886925, + "loss": 0.4954973220825195, + "mean_token_accuracy": 0.8582441991567612, + "num_tokens": 1547902.0, + "step": 600 + }, + { + "entropy": 0.5220623269677163, + "epoch": 1.7389558232931726, + "grad_norm": 0.42622247338294983, + "learning_rate": 0.00029536051642302825, + "loss": 0.4832091522216797, + "mean_token_accuracy": 0.8612468218803406, + "num_tokens": 1678635.0, + "step": 650 + }, + { + "entropy": 0.5149065843224525, + "epoch": 1.8728246318607764, + "grad_norm": 0.5336588621139526, + "learning_rate": 0.0002934244135402283, + "loss": 0.48042800903320315, + "mean_token_accuracy": 0.8626475504040718, + "num_tokens": 1803328.0, + "step": 700 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5772490365803242, + "eval_loss": 0.5702283978462219, + "eval_mean_token_accuracy": 0.8345756351947784, + "eval_num_tokens": 1919486.0, + "eval_runtime": 52.1598, + "eval_samples_per_second": 30.656, + "eval_steps_per_second": 3.834, + "step": 748 + }, + { + "entropy": 0.535869851106345, + "epoch": 2.005354752342704, + "grad_norm": 0.41517460346221924, + "learning_rate": 0.00029117632585550326, + "loss": 0.4852032470703125, + "mean_token_accuracy": 0.8596673321844351, + "num_tokens": 1925307.0, + "step": 750 + }, + { + "entropy": 0.447470418959856, + "epoch": 2.139223560910308, + "grad_norm": 0.3979392349720001, + "learning_rate": 0.00028862114829292755, + "loss": 0.4028204345703125, + "mean_token_accuracy": 0.8767653766274452, + "num_tokens": 2055627.0, + "step": 800 + }, + { + "entropy": 0.4631192748993635, + "epoch": 2.2730923694779115, + "grad_norm": 0.5497493147850037, + "learning_rate": 0.0002857644444255997, + "loss": 0.41807849884033205, + "mean_token_accuracy": 0.8721370902657509, + "num_tokens": 2177850.0, + "step": 850 + }, + { + "entropy": 0.4618495012819767, + "epoch": 2.4069611780455156, + "grad_norm": 0.5762711763381958, + "learning_rate": 0.00028261243436167164, + "loss": 0.4263508987426758, + "mean_token_accuracy": 0.8728304222226143, + "num_tokens": 2304612.0, + "step": 900 + }, + { + "entropy": 0.46360296294093134, + "epoch": 2.540829986613119, + "grad_norm": 0.4397684335708618, + "learning_rate": 0.00027917198120085693, + "loss": 0.4204945373535156, + "mean_token_accuracy": 0.8729385876655579, + "num_tokens": 2430946.0, + "step": 950 + }, + { + "entropy": 0.4606146043539047, + "epoch": 2.674698795180723, + "grad_norm": 0.49730950593948364, + "learning_rate": 0.0002754505760909068, + "loss": 0.41891841888427733, + "mean_token_accuracy": 0.8735697677731514, + "num_tokens": 2557241.0, + "step": 1000 + }, + { + "entropy": 0.4599393020570278, + "epoch": 2.8085676037483265, + "grad_norm": 0.3867562711238861, + "learning_rate": 0.00027145632191659207, + "loss": 0.4164935302734375, + "mean_token_accuracy": 0.8750462782382965, + "num_tokens": 2692775.0, + "step": 1050 + }, + { + "entropy": 0.4657205778360367, + "epoch": 2.9424364123159306, + "grad_norm": 0.3775743246078491, + "learning_rate": 0.0002671979156567051, + "loss": 0.42220756530761716, + "mean_token_accuracy": 0.8735427415370941, + "num_tokens": 2823060.0, + "step": 1100 + }, + { + "epoch": 3.0, + "eval_entropy": 0.49825115099549294, + "eval_loss": 0.562646746635437, + "eval_mean_token_accuracy": 0.8420541244745254, + "eval_num_tokens": 2879229.0, + "eval_runtime": 52.1338, + "eval_samples_per_second": 30.671, + "eval_steps_per_second": 3.836, + "step": 1122 + }, + { + "entropy": 0.4122138220553446, + "epoch": 3.074966532797858, + "grad_norm": 0.4913100302219391, + "learning_rate": 0.0002626846294474985, + "loss": 0.3638055419921875, + "mean_token_accuracy": 0.8879981071057946, + "num_tokens": 2955424.0, + "step": 1150 + }, + { + "entropy": 0.38473270788788794, + "epoch": 3.208835341365462, + "grad_norm": 0.5340412259101868, + "learning_rate": 0.00025792629039379165, + "loss": 0.3335049438476563, + "mean_token_accuracy": 0.8928974050283432, + "num_tokens": 3078799.0, + "step": 1200 + }, + { + "entropy": 0.3838996239006519, + "epoch": 3.3427041499330654, + "grad_norm": 0.4576664865016937, + "learning_rate": 0.0002529332591717036, + "loss": 0.3357630157470703, + "mean_token_accuracy": 0.8939724805951118, + "num_tokens": 3211945.0, + "step": 1250 + }, + { + "entropy": 0.3874890775978565, + "epoch": 3.4765729585006695, + "grad_norm": 0.4499485194683075, + "learning_rate": 0.000247716407469603, + "loss": 0.3426588821411133, + "mean_token_accuracy": 0.8922444903850555, + "num_tokens": 3343933.0, + "step": 1300 + }, + { + "entropy": 0.3952706679701805, + "epoch": 3.610441767068273, + "grad_norm": 0.4777601957321167, + "learning_rate": 0.00024228709431639412, + "loss": 0.34926448822021483, + "mean_token_accuracy": 0.8901252299547195, + "num_tokens": 3470087.0, + "step": 1350 + }, + { + "entropy": 0.39983385235071184, + "epoch": 3.7443105756358768, + "grad_norm": 0.36449742317199707, + "learning_rate": 0.00023665714134868044, + "loss": 0.3480478286743164, + "mean_token_accuracy": 0.890526123046875, + "num_tokens": 3590858.0, + "step": 1400 + }, + { + "entropy": 0.3916309730708599, + "epoch": 3.878179384203481, + "grad_norm": 0.37300795316696167, + "learning_rate": 0.00023083880707065973, + "loss": 0.349254150390625, + "mean_token_accuracy": 0.8909555944800377, + "num_tokens": 3725806.0, + "step": 1450 + }, + { + "epoch": 4.0, + "eval_entropy": 0.4629781968891621, + "eval_loss": 0.5623059868812561, + "eval_mean_token_accuracy": 0.8462381008267402, + "eval_num_tokens": 3838972.0, + "eval_runtime": 52.0941, + "eval_samples_per_second": 30.694, + "eval_steps_per_second": 3.839, + "step": 1496 + }, + { + "entropy": 0.39569308842071377, + "epoch": 4.010709504685408, + "grad_norm": 0.40932103991508484, + "learning_rate": 0.0002248447601627953, + "loss": 0.3418621063232422, + "mean_token_accuracy": 0.8925740992782092, + "num_tokens": 3849380.0, + "step": 1500 + }, + { + "entropy": 0.29304657004773615, + "epoch": 4.144578313253012, + "grad_norm": 0.5317501425743103, + "learning_rate": 0.00021868805189738066, + "loss": 0.2386075210571289, + "mean_token_accuracy": 0.9206075271964074, + "num_tokens": 3976694.0, + "step": 1550 + }, + { + "entropy": 0.2991082117706537, + "epoch": 4.278447121820616, + "grad_norm": 0.5011683106422424, + "learning_rate": 0.00021238208772105958, + "loss": 0.2479239273071289, + "mean_token_accuracy": 0.9165873003005981, + "num_tokens": 4110204.0, + "step": 1600 + }, + { + "entropy": 0.2965650236606598, + "epoch": 4.412315930388219, + "grad_norm": 0.45324602723121643, + "learning_rate": 0.00020594059806617674, + "loss": 0.24801044464111327, + "mean_token_accuracy": 0.9172127342224121, + "num_tokens": 4240883.0, + "step": 1650 + }, + { + "entropy": 0.30350560761988166, + "epoch": 4.546184738955823, + "grad_norm": 0.4740602970123291, + "learning_rate": 0.00019937760845451308, + "loss": 0.2523613929748535, + "mean_token_accuracy": 0.9163035461306572, + "num_tokens": 4365381.0, + "step": 1700 + }, + { + "entropy": 0.3076958200335503, + "epoch": 4.680053547523427, + "grad_norm": 0.4616807699203491, + "learning_rate": 0.00019270740895850172, + "loss": 0.25745229721069335, + "mean_token_accuracy": 0.9147638303041458, + "num_tokens": 4490711.0, + "step": 1750 + }, + { + "entropy": 0.3103550442308187, + "epoch": 4.813922356091031, + "grad_norm": 0.5542571544647217, + "learning_rate": 0.00018594452308641905, + "loss": 0.2601847457885742, + "mean_token_accuracy": 0.9133006593585015, + "num_tokens": 4618532.0, + "step": 1800 + }, + { + "entropy": 0.30610986322164535, + "epoch": 4.947791164658635, + "grad_norm": 0.43830257654190063, + "learning_rate": 0.00017910367615929835, + "loss": 0.25211355209350583, + "mean_token_accuracy": 0.9155091819167137, + "num_tokens": 4752261.0, + "step": 1850 + }, + { + "epoch": 5.0, + "eval_entropy": 0.3738320705294609, + "eval_loss": 0.6202279925346375, + "eval_mean_token_accuracy": 0.8485285672545433, + "eval_num_tokens": 4798715.0, + "eval_runtime": 52.1176, + "eval_samples_per_second": 30.681, + "eval_steps_per_second": 3.837, + "step": 1870 + }, + { + "entropy": 0.24846129032849062, + "epoch": 5.080321285140562, + "grad_norm": 0.3632349371910095, + "learning_rate": 0.00017219976324842225, + "loss": 0.1933494758605957, + "mean_token_accuracy": 0.935125366906927, + "num_tokens": 4879271.0, + "step": 1900 + }, + { + "entropy": 0.211940533593297, + "epoch": 5.214190093708166, + "grad_norm": 0.5480105876922607, + "learning_rate": 0.00016524781674320582, + "loss": 0.15619863510131837, + "mean_token_accuracy": 0.9460694769024849, + "num_tokens": 5005076.0, + "step": 1950 + }, + { + "entropy": 0.214038780964911, + "epoch": 5.34805890227577, + "grad_norm": 0.5447636842727661, + "learning_rate": 0.00015826297362008662, + "loss": 0.15696516990661621, + "mean_token_accuracy": 0.9461162313818932, + "num_tokens": 5133591.0, + "step": 2000 + }, + { + "entropy": 0.21759917587041855, + "epoch": 5.481927710843373, + "grad_norm": 0.48160865902900696, + "learning_rate": 0.00015126044248368997, + "loss": 0.162998046875, + "mean_token_accuracy": 0.9450593250989914, + "num_tokens": 5257252.0, + "step": 2050 + }, + { + "entropy": 0.21842033743858338, + "epoch": 5.615796519410977, + "grad_norm": 0.5399336218833923, + "learning_rate": 0.00014425547045203324, + "loss": 0.1642344856262207, + "mean_token_accuracy": 0.9428097534179688, + "num_tokens": 5388652.0, + "step": 2100 + }, + { + "entropy": 0.21970326244831084, + "epoch": 5.749665327978581, + "grad_norm": 0.6201577186584473, + "learning_rate": 0.00013726330995787156, + "loss": 0.16450761795043944, + "mean_token_accuracy": 0.9430688858032227, + "num_tokens": 5515488.0, + "step": 2150 + }, + { + "entropy": 0.2100747512280941, + "epoch": 5.883534136546185, + "grad_norm": 0.5263391137123108, + "learning_rate": 0.00013029918553847215, + "loss": 0.15882587432861328, + "mean_token_accuracy": 0.9454078593850136, + "num_tokens": 5644323.0, + "step": 2200 + }, + { + "epoch": 6.0, + "eval_entropy": 0.30004522860050203, + "eval_loss": 0.710449755191803, + "eval_mean_token_accuracy": 0.8465994608402252, + "eval_num_tokens": 5758458.0, + "eval_runtime": 52.1136, + "eval_samples_per_second": 30.683, + "eval_steps_per_second": 3.838, + "step": 2244 + }, + { + "entropy": 0.21408834458902629, + "epoch": 6.016064257028113, + "grad_norm": 0.30019697546958923, + "learning_rate": 0.00012337826068612668, + "loss": 0.15355757713317872, + "mean_token_accuracy": 0.9468493735549426, + "num_tokens": 5773027.0, + "step": 2250 + }, + { + "entropy": 0.14564846321940422, + "epoch": 6.149933065595716, + "grad_norm": 0.44614729285240173, + "learning_rate": 0.00011651560483158201, + "loss": 0.09214784622192383, + "mean_token_accuracy": 0.9686754134297371, + "num_tokens": 5904840.0, + "step": 2300 + }, + { + "entropy": 0.140359299890697, + "epoch": 6.28380187416332, + "grad_norm": 0.5046149492263794, + "learning_rate": 0.00010972616053227751, + "loss": 0.09080178260803223, + "mean_token_accuracy": 0.9693775433301925, + "num_tokens": 6037457.0, + "step": 2350 + }, + { + "entropy": 0.15537990894168616, + "epoch": 6.417670682730924, + "grad_norm": 0.4861410856246948, + "learning_rate": 0.00010302471093683278, + "loss": 0.09675224304199219, + "mean_token_accuracy": 0.9666234213113785, + "num_tokens": 6161042.0, + "step": 2400 + }, + { + "entropy": 0.15113401643931865, + "epoch": 6.551539491298527, + "grad_norm": 0.40878698229789734, + "learning_rate": 9.642584759662807e-05, + "loss": 0.09406195640563965, + "mean_token_accuracy": 0.967639978826046, + "num_tokens": 6291748.0, + "step": 2450 + }, + { + "entropy": 0.15042921423912048, + "epoch": 6.685408299866131, + "grad_norm": 0.46771717071533203, + "learning_rate": 8.994393869456307e-05, + "loss": 0.09585455894470214, + "mean_token_accuracy": 0.9676038774847985, + "num_tokens": 6419265.0, + "step": 2500 + }, + { + "entropy": 0.1505335782468319, + "epoch": 6.8192771084337345, + "grad_norm": 0.38982564210891724, + "learning_rate": 8.359309776017181e-05, + "loss": 0.09631505012512206, + "mean_token_accuracy": 0.9675487798452377, + "num_tokens": 6544758.0, + "step": 2550 + }, + { + "entropy": 0.15015050683170558, + "epoch": 6.953145917001339, + "grad_norm": 0.3234366774559021, + "learning_rate": 7.738715293921322e-05, + "loss": 0.09446552276611328, + "mean_token_accuracy": 0.9673746883869171, + "num_tokens": 6674626.0, + "step": 2600 + }, + { + "epoch": 7.0, + "eval_entropy": 0.23365802489221096, + "eval_loss": 0.8532201647758484, + "eval_mean_token_accuracy": 0.8434653553366661, + "eval_num_tokens": 6718201.0, + "eval_runtime": 52.1492, + "eval_samples_per_second": 30.662, + "eval_steps_per_second": 3.835, + "step": 2618 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.9799506495148134e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/README.md b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/tokenizer_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9322240d24056e758426db91facd16e9b72fa546 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-2992/trainer_state.json @@ -0,0 +1,712 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 2992, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + }, + { + "entropy": 0.5630604237920106, + "epoch": 1.069611780455154, + "grad_norm": 0.7640048861503601, + "learning_rate": 0.00030023795987949866, + "loss": 0.5240223693847657, + "mean_token_accuracy": 0.8503808174470459, + "num_tokens": 1022581.0, + "step": 400 + }, + { + "entropy": 0.5431726336479187, + "epoch": 1.2034805890227578, + "grad_norm": 0.43694695830345154, + "learning_rate": 0.0002999111394498373, + "loss": 0.5033904266357422, + "mean_token_accuracy": 0.8546582865715027, + "num_tokens": 1151430.0, + "step": 450 + }, + { + "entropy": 0.539291135519743, + "epoch": 1.3373493975903614, + "grad_norm": 1.0249971151351929, + "learning_rate": 0.0002992582102002974, + "loss": 0.49736488342285157, + "mean_token_accuracy": 0.8569463074207306, + "num_tokens": 1282481.0, + "step": 500 + }, + { + "entropy": 0.5289052908122539, + "epoch": 1.4712182061579653, + "grad_norm": 0.862391471862793, + "learning_rate": 0.0002982805938010052, + "loss": 0.49582687377929685, + "mean_token_accuracy": 0.8568812811374664, + "num_tokens": 1414166.0, + "step": 550 + }, + { + "entropy": 0.532991835474968, + "epoch": 1.605087014725569, + "grad_norm": 0.44117477536201477, + "learning_rate": 0.000296980418886925, + "loss": 0.4954973220825195, + "mean_token_accuracy": 0.8582441991567612, + "num_tokens": 1547902.0, + "step": 600 + }, + { + "entropy": 0.5220623269677163, + "epoch": 1.7389558232931726, + "grad_norm": 0.42622247338294983, + "learning_rate": 0.00029536051642302825, + "loss": 0.4832091522216797, + "mean_token_accuracy": 0.8612468218803406, + "num_tokens": 1678635.0, + "step": 650 + }, + { + "entropy": 0.5149065843224525, + "epoch": 1.8728246318607764, + "grad_norm": 0.5336588621139526, + "learning_rate": 0.0002934244135402283, + "loss": 0.48042800903320315, + "mean_token_accuracy": 0.8626475504040718, + "num_tokens": 1803328.0, + "step": 700 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5772490365803242, + "eval_loss": 0.5702283978462219, + "eval_mean_token_accuracy": 0.8345756351947784, + "eval_num_tokens": 1919486.0, + "eval_runtime": 52.1598, + "eval_samples_per_second": 30.656, + "eval_steps_per_second": 3.834, + "step": 748 + }, + { + "entropy": 0.535869851106345, + "epoch": 2.005354752342704, + "grad_norm": 0.41517460346221924, + "learning_rate": 0.00029117632585550326, + "loss": 0.4852032470703125, + "mean_token_accuracy": 0.8596673321844351, + "num_tokens": 1925307.0, + "step": 750 + }, + { + "entropy": 0.447470418959856, + "epoch": 2.139223560910308, + "grad_norm": 0.3979392349720001, + "learning_rate": 0.00028862114829292755, + "loss": 0.4028204345703125, + "mean_token_accuracy": 0.8767653766274452, + "num_tokens": 2055627.0, + "step": 800 + }, + { + "entropy": 0.4631192748993635, + "epoch": 2.2730923694779115, + "grad_norm": 0.5497493147850037, + "learning_rate": 0.0002857644444255997, + "loss": 0.41807849884033205, + "mean_token_accuracy": 0.8721370902657509, + "num_tokens": 2177850.0, + "step": 850 + }, + { + "entropy": 0.4618495012819767, + "epoch": 2.4069611780455156, + "grad_norm": 0.5762711763381958, + "learning_rate": 0.00028261243436167164, + "loss": 0.4263508987426758, + "mean_token_accuracy": 0.8728304222226143, + "num_tokens": 2304612.0, + "step": 900 + }, + { + "entropy": 0.46360296294093134, + "epoch": 2.540829986613119, + "grad_norm": 0.4397684335708618, + "learning_rate": 0.00027917198120085693, + "loss": 0.4204945373535156, + "mean_token_accuracy": 0.8729385876655579, + "num_tokens": 2430946.0, + "step": 950 + }, + { + "entropy": 0.4606146043539047, + "epoch": 2.674698795180723, + "grad_norm": 0.49730950593948364, + "learning_rate": 0.0002754505760909068, + "loss": 0.41891841888427733, + "mean_token_accuracy": 0.8735697677731514, + "num_tokens": 2557241.0, + "step": 1000 + }, + { + "entropy": 0.4599393020570278, + "epoch": 2.8085676037483265, + "grad_norm": 0.3867562711238861, + "learning_rate": 0.00027145632191659207, + "loss": 0.4164935302734375, + "mean_token_accuracy": 0.8750462782382965, + "num_tokens": 2692775.0, + "step": 1050 + }, + { + "entropy": 0.4657205778360367, + "epoch": 2.9424364123159306, + "grad_norm": 0.3775743246078491, + "learning_rate": 0.0002671979156567051, + "loss": 0.42220756530761716, + "mean_token_accuracy": 0.8735427415370941, + "num_tokens": 2823060.0, + "step": 1100 + }, + { + "epoch": 3.0, + "eval_entropy": 0.49825115099549294, + "eval_loss": 0.562646746635437, + "eval_mean_token_accuracy": 0.8420541244745254, + "eval_num_tokens": 2879229.0, + "eval_runtime": 52.1338, + "eval_samples_per_second": 30.671, + "eval_steps_per_second": 3.836, + "step": 1122 + }, + { + "entropy": 0.4122138220553446, + "epoch": 3.074966532797858, + "grad_norm": 0.4913100302219391, + "learning_rate": 0.0002626846294474985, + "loss": 0.3638055419921875, + "mean_token_accuracy": 0.8879981071057946, + "num_tokens": 2955424.0, + "step": 1150 + }, + { + "entropy": 0.38473270788788794, + "epoch": 3.208835341365462, + "grad_norm": 0.5340412259101868, + "learning_rate": 0.00025792629039379165, + "loss": 0.3335049438476563, + "mean_token_accuracy": 0.8928974050283432, + "num_tokens": 3078799.0, + "step": 1200 + }, + { + "entropy": 0.3838996239006519, + "epoch": 3.3427041499330654, + "grad_norm": 0.4576664865016937, + "learning_rate": 0.0002529332591717036, + "loss": 0.3357630157470703, + "mean_token_accuracy": 0.8939724805951118, + "num_tokens": 3211945.0, + "step": 1250 + }, + { + "entropy": 0.3874890775978565, + "epoch": 3.4765729585006695, + "grad_norm": 0.4499485194683075, + "learning_rate": 0.000247716407469603, + "loss": 0.3426588821411133, + "mean_token_accuracy": 0.8922444903850555, + "num_tokens": 3343933.0, + "step": 1300 + }, + { + "entropy": 0.3952706679701805, + "epoch": 3.610441767068273, + "grad_norm": 0.4777601957321167, + "learning_rate": 0.00024228709431639412, + "loss": 0.34926448822021483, + "mean_token_accuracy": 0.8901252299547195, + "num_tokens": 3470087.0, + "step": 1350 + }, + { + "entropy": 0.39983385235071184, + "epoch": 3.7443105756358768, + "grad_norm": 0.36449742317199707, + "learning_rate": 0.00023665714134868044, + "loss": 0.3480478286743164, + "mean_token_accuracy": 0.890526123046875, + "num_tokens": 3590858.0, + "step": 1400 + }, + { + "entropy": 0.3916309730708599, + "epoch": 3.878179384203481, + "grad_norm": 0.37300795316696167, + "learning_rate": 0.00023083880707065973, + "loss": 0.349254150390625, + "mean_token_accuracy": 0.8909555944800377, + "num_tokens": 3725806.0, + "step": 1450 + }, + { + "epoch": 4.0, + "eval_entropy": 0.4629781968891621, + "eval_loss": 0.5623059868812561, + "eval_mean_token_accuracy": 0.8462381008267402, + "eval_num_tokens": 3838972.0, + "eval_runtime": 52.0941, + "eval_samples_per_second": 30.694, + "eval_steps_per_second": 3.839, + "step": 1496 + }, + { + "entropy": 0.39569308842071377, + "epoch": 4.010709504685408, + "grad_norm": 0.40932103991508484, + "learning_rate": 0.0002248447601627953, + "loss": 0.3418621063232422, + "mean_token_accuracy": 0.8925740992782092, + "num_tokens": 3849380.0, + "step": 1500 + }, + { + "entropy": 0.29304657004773615, + "epoch": 4.144578313253012, + "grad_norm": 0.5317501425743103, + "learning_rate": 0.00021868805189738066, + "loss": 0.2386075210571289, + "mean_token_accuracy": 0.9206075271964074, + "num_tokens": 3976694.0, + "step": 1550 + }, + { + "entropy": 0.2991082117706537, + "epoch": 4.278447121820616, + "grad_norm": 0.5011683106422424, + "learning_rate": 0.00021238208772105958, + "loss": 0.2479239273071289, + "mean_token_accuracy": 0.9165873003005981, + "num_tokens": 4110204.0, + "step": 1600 + }, + { + "entropy": 0.2965650236606598, + "epoch": 4.412315930388219, + "grad_norm": 0.45324602723121643, + "learning_rate": 0.00020594059806617674, + "loss": 0.24801044464111327, + "mean_token_accuracy": 0.9172127342224121, + "num_tokens": 4240883.0, + "step": 1650 + }, + { + "entropy": 0.30350560761988166, + "epoch": 4.546184738955823, + "grad_norm": 0.4740602970123291, + "learning_rate": 0.00019937760845451308, + "loss": 0.2523613929748535, + "mean_token_accuracy": 0.9163035461306572, + "num_tokens": 4365381.0, + "step": 1700 + }, + { + "entropy": 0.3076958200335503, + "epoch": 4.680053547523427, + "grad_norm": 0.4616807699203491, + "learning_rate": 0.00019270740895850172, + "loss": 0.25745229721069335, + "mean_token_accuracy": 0.9147638303041458, + "num_tokens": 4490711.0, + "step": 1750 + }, + { + "entropy": 0.3103550442308187, + "epoch": 4.813922356091031, + "grad_norm": 0.5542571544647217, + "learning_rate": 0.00018594452308641905, + "loss": 0.2601847457885742, + "mean_token_accuracy": 0.9133006593585015, + "num_tokens": 4618532.0, + "step": 1800 + }, + { + "entropy": 0.30610986322164535, + "epoch": 4.947791164658635, + "grad_norm": 0.43830257654190063, + "learning_rate": 0.00017910367615929835, + "loss": 0.25211355209350583, + "mean_token_accuracy": 0.9155091819167137, + "num_tokens": 4752261.0, + "step": 1850 + }, + { + "epoch": 5.0, + "eval_entropy": 0.3738320705294609, + "eval_loss": 0.6202279925346375, + "eval_mean_token_accuracy": 0.8485285672545433, + "eval_num_tokens": 4798715.0, + "eval_runtime": 52.1176, + "eval_samples_per_second": 30.681, + "eval_steps_per_second": 3.837, + "step": 1870 + }, + { + "entropy": 0.24846129032849062, + "epoch": 5.080321285140562, + "grad_norm": 0.3632349371910095, + "learning_rate": 0.00017219976324842225, + "loss": 0.1933494758605957, + "mean_token_accuracy": 0.935125366906927, + "num_tokens": 4879271.0, + "step": 1900 + }, + { + "entropy": 0.211940533593297, + "epoch": 5.214190093708166, + "grad_norm": 0.5480105876922607, + "learning_rate": 0.00016524781674320582, + "loss": 0.15619863510131837, + "mean_token_accuracy": 0.9460694769024849, + "num_tokens": 5005076.0, + "step": 1950 + }, + { + "entropy": 0.214038780964911, + "epoch": 5.34805890227577, + "grad_norm": 0.5447636842727661, + "learning_rate": 0.00015826297362008662, + "loss": 0.15696516990661621, + "mean_token_accuracy": 0.9461162313818932, + "num_tokens": 5133591.0, + "step": 2000 + }, + { + "entropy": 0.21759917587041855, + "epoch": 5.481927710843373, + "grad_norm": 0.48160865902900696, + "learning_rate": 0.00015126044248368997, + "loss": 0.162998046875, + "mean_token_accuracy": 0.9450593250989914, + "num_tokens": 5257252.0, + "step": 2050 + }, + { + "entropy": 0.21842033743858338, + "epoch": 5.615796519410977, + "grad_norm": 0.5399336218833923, + "learning_rate": 0.00014425547045203324, + "loss": 0.1642344856262207, + "mean_token_accuracy": 0.9428097534179688, + "num_tokens": 5388652.0, + "step": 2100 + }, + { + "entropy": 0.21970326244831084, + "epoch": 5.749665327978581, + "grad_norm": 0.6201577186584473, + "learning_rate": 0.00013726330995787156, + "loss": 0.16450761795043944, + "mean_token_accuracy": 0.9430688858032227, + "num_tokens": 5515488.0, + "step": 2150 + }, + { + "entropy": 0.2100747512280941, + "epoch": 5.883534136546185, + "grad_norm": 0.5263391137123108, + "learning_rate": 0.00013029918553847215, + "loss": 0.15882587432861328, + "mean_token_accuracy": 0.9454078593850136, + "num_tokens": 5644323.0, + "step": 2200 + }, + { + "epoch": 6.0, + "eval_entropy": 0.30004522860050203, + "eval_loss": 0.710449755191803, + "eval_mean_token_accuracy": 0.8465994608402252, + "eval_num_tokens": 5758458.0, + "eval_runtime": 52.1136, + "eval_samples_per_second": 30.683, + "eval_steps_per_second": 3.838, + "step": 2244 + }, + { + "entropy": 0.21408834458902629, + "epoch": 6.016064257028113, + "grad_norm": 0.30019697546958923, + "learning_rate": 0.00012337826068612668, + "loss": 0.15355757713317872, + "mean_token_accuracy": 0.9468493735549426, + "num_tokens": 5773027.0, + "step": 2250 + }, + { + "entropy": 0.14564846321940422, + "epoch": 6.149933065595716, + "grad_norm": 0.44614729285240173, + "learning_rate": 0.00011651560483158201, + "loss": 0.09214784622192383, + "mean_token_accuracy": 0.9686754134297371, + "num_tokens": 5904840.0, + "step": 2300 + }, + { + "entropy": 0.140359299890697, + "epoch": 6.28380187416332, + "grad_norm": 0.5046149492263794, + "learning_rate": 0.00010972616053227751, + "loss": 0.09080178260803223, + "mean_token_accuracy": 0.9693775433301925, + "num_tokens": 6037457.0, + "step": 2350 + }, + { + "entropy": 0.15537990894168616, + "epoch": 6.417670682730924, + "grad_norm": 0.4861410856246948, + "learning_rate": 0.00010302471093683278, + "loss": 0.09675224304199219, + "mean_token_accuracy": 0.9666234213113785, + "num_tokens": 6161042.0, + "step": 2400 + }, + { + "entropy": 0.15113401643931865, + "epoch": 6.551539491298527, + "grad_norm": 0.40878698229789734, + "learning_rate": 9.642584759662807e-05, + "loss": 0.09406195640563965, + "mean_token_accuracy": 0.967639978826046, + "num_tokens": 6291748.0, + "step": 2450 + }, + { + "entropy": 0.15042921423912048, + "epoch": 6.685408299866131, + "grad_norm": 0.46771717071533203, + "learning_rate": 8.994393869456307e-05, + "loss": 0.09585455894470214, + "mean_token_accuracy": 0.9676038774847985, + "num_tokens": 6419265.0, + "step": 2500 + }, + { + "entropy": 0.1505335782468319, + "epoch": 6.8192771084337345, + "grad_norm": 0.38982564210891724, + "learning_rate": 8.359309776017181e-05, + "loss": 0.09631505012512206, + "mean_token_accuracy": 0.9675487798452377, + "num_tokens": 6544758.0, + "step": 2550 + }, + { + "entropy": 0.15015050683170558, + "epoch": 6.953145917001339, + "grad_norm": 0.3234366774559021, + "learning_rate": 7.738715293921322e-05, + "loss": 0.09446552276611328, + "mean_token_accuracy": 0.9673746883869171, + "num_tokens": 6674626.0, + "step": 2600 + }, + { + "epoch": 7.0, + "eval_entropy": 0.23365802489221096, + "eval_loss": 0.8532201647758484, + "eval_mean_token_accuracy": 0.8434653553366661, + "eval_num_tokens": 6718201.0, + "eval_runtime": 52.1492, + "eval_samples_per_second": 30.662, + "eval_steps_per_second": 3.835, + "step": 2618 + }, + { + "entropy": 0.1327564088307848, + "epoch": 7.085676037483267, + "grad_norm": 0.2602083086967468, + "learning_rate": 7.133961688464825e-05, + "loss": 0.07557847023010254, + "mean_token_accuracy": 0.9734747448954919, + "num_tokens": 6803830.0, + "step": 2650 + }, + { + "entropy": 0.11478025399148464, + "epoch": 7.21954484605087, + "grad_norm": 0.29537495970726013, + "learning_rate": 6.546365733456209e-05, + "loss": 0.06565972328186036, + "mean_token_accuracy": 0.977358937561512, + "num_tokens": 6936289.0, + "step": 2700 + }, + { + "entropy": 0.126174540463835, + "epoch": 7.353413654618474, + "grad_norm": 0.3025906980037689, + "learning_rate": 5.9772068441094775e-05, + "loss": 0.06768707275390624, + "mean_token_accuracy": 0.9757491773366929, + "num_tokens": 7057823.0, + "step": 2750 + }, + { + "entropy": 0.11537095734849573, + "epoch": 7.4872824631860775, + "grad_norm": 0.22307702898979187, + "learning_rate": 5.427724291280762e-05, + "loss": 0.0664353084564209, + "mean_token_accuracy": 0.9773979318141938, + "num_tokens": 7187020.0, + "step": 2800 + }, + { + "entropy": 0.11154996948316694, + "epoch": 7.621151271753681, + "grad_norm": 0.4059358239173889, + "learning_rate": 4.8991145031141555e-05, + "loss": 0.06321117877960206, + "mean_token_accuracy": 0.9781053271889687, + "num_tokens": 7323457.0, + "step": 2850 + }, + { + "entropy": 0.11587916240096093, + "epoch": 7.755020080321285, + "grad_norm": 0.185866579413414, + "learning_rate": 4.392528459972073e-05, + "loss": 0.06652077674865722, + "mean_token_accuracy": 0.9772412911057472, + "num_tokens": 7451765.0, + "step": 2900 + }, + { + "entropy": 0.11466571116819978, + "epoch": 7.888888888888889, + "grad_norm": 0.25750523805618286, + "learning_rate": 3.909069188322406e-05, + "loss": 0.06799327373504639, + "mean_token_accuracy": 0.9771603578329087, + "num_tokens": 7578222.0, + "step": 2950 + }, + { + "epoch": 8.0, + "eval_entropy": 0.2060005297511816, + "eval_loss": 0.955081045627594, + "eval_mean_token_accuracy": 0.847015127837658, + "eval_num_tokens": 7677944.0, + "eval_runtime": 52.1277, + "eval_samples_per_second": 30.675, + "eval_steps_per_second": 3.837, + "step": 2992 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.4083692178899866e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/README.md b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/tokenizer_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9026a11fa60140c77e78569d546724b33b09bb8a --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3366/trainer_state.json @@ -0,0 +1,803 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 3366, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + }, + { + "entropy": 0.5630604237920106, + "epoch": 1.069611780455154, + "grad_norm": 0.7640048861503601, + "learning_rate": 0.00030023795987949866, + "loss": 0.5240223693847657, + "mean_token_accuracy": 0.8503808174470459, + "num_tokens": 1022581.0, + "step": 400 + }, + { + "entropy": 0.5431726336479187, + "epoch": 1.2034805890227578, + "grad_norm": 0.43694695830345154, + "learning_rate": 0.0002999111394498373, + "loss": 0.5033904266357422, + "mean_token_accuracy": 0.8546582865715027, + "num_tokens": 1151430.0, + "step": 450 + }, + { + "entropy": 0.539291135519743, + "epoch": 1.3373493975903614, + "grad_norm": 1.0249971151351929, + "learning_rate": 0.0002992582102002974, + "loss": 0.49736488342285157, + "mean_token_accuracy": 0.8569463074207306, + "num_tokens": 1282481.0, + "step": 500 + }, + { + "entropy": 0.5289052908122539, + "epoch": 1.4712182061579653, + "grad_norm": 0.862391471862793, + "learning_rate": 0.0002982805938010052, + "loss": 0.49582687377929685, + "mean_token_accuracy": 0.8568812811374664, + "num_tokens": 1414166.0, + "step": 550 + }, + { + "entropy": 0.532991835474968, + "epoch": 1.605087014725569, + "grad_norm": 0.44117477536201477, + "learning_rate": 0.000296980418886925, + "loss": 0.4954973220825195, + "mean_token_accuracy": 0.8582441991567612, + "num_tokens": 1547902.0, + "step": 600 + }, + { + "entropy": 0.5220623269677163, + "epoch": 1.7389558232931726, + "grad_norm": 0.42622247338294983, + "learning_rate": 0.00029536051642302825, + "loss": 0.4832091522216797, + "mean_token_accuracy": 0.8612468218803406, + "num_tokens": 1678635.0, + "step": 650 + }, + { + "entropy": 0.5149065843224525, + "epoch": 1.8728246318607764, + "grad_norm": 0.5336588621139526, + "learning_rate": 0.0002934244135402283, + "loss": 0.48042800903320315, + "mean_token_accuracy": 0.8626475504040718, + "num_tokens": 1803328.0, + "step": 700 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5772490365803242, + "eval_loss": 0.5702283978462219, + "eval_mean_token_accuracy": 0.8345756351947784, + "eval_num_tokens": 1919486.0, + "eval_runtime": 52.1598, + "eval_samples_per_second": 30.656, + "eval_steps_per_second": 3.834, + "step": 748 + }, + { + "entropy": 0.535869851106345, + "epoch": 2.005354752342704, + "grad_norm": 0.41517460346221924, + "learning_rate": 0.00029117632585550326, + "loss": 0.4852032470703125, + "mean_token_accuracy": 0.8596673321844351, + "num_tokens": 1925307.0, + "step": 750 + }, + { + "entropy": 0.447470418959856, + "epoch": 2.139223560910308, + "grad_norm": 0.3979392349720001, + "learning_rate": 0.00028862114829292755, + "loss": 0.4028204345703125, + "mean_token_accuracy": 0.8767653766274452, + "num_tokens": 2055627.0, + "step": 800 + }, + { + "entropy": 0.4631192748993635, + "epoch": 2.2730923694779115, + "grad_norm": 0.5497493147850037, + "learning_rate": 0.0002857644444255997, + "loss": 0.41807849884033205, + "mean_token_accuracy": 0.8721370902657509, + "num_tokens": 2177850.0, + "step": 850 + }, + { + "entropy": 0.4618495012819767, + "epoch": 2.4069611780455156, + "grad_norm": 0.5762711763381958, + "learning_rate": 0.00028261243436167164, + "loss": 0.4263508987426758, + "mean_token_accuracy": 0.8728304222226143, + "num_tokens": 2304612.0, + "step": 900 + }, + { + "entropy": 0.46360296294093134, + "epoch": 2.540829986613119, + "grad_norm": 0.4397684335708618, + "learning_rate": 0.00027917198120085693, + "loss": 0.4204945373535156, + "mean_token_accuracy": 0.8729385876655579, + "num_tokens": 2430946.0, + "step": 950 + }, + { + "entropy": 0.4606146043539047, + "epoch": 2.674698795180723, + "grad_norm": 0.49730950593948364, + "learning_rate": 0.0002754505760909068, + "loss": 0.41891841888427733, + "mean_token_accuracy": 0.8735697677731514, + "num_tokens": 2557241.0, + "step": 1000 + }, + { + "entropy": 0.4599393020570278, + "epoch": 2.8085676037483265, + "grad_norm": 0.3867562711238861, + "learning_rate": 0.00027145632191659207, + "loss": 0.4164935302734375, + "mean_token_accuracy": 0.8750462782382965, + "num_tokens": 2692775.0, + "step": 1050 + }, + { + "entropy": 0.4657205778360367, + "epoch": 2.9424364123159306, + "grad_norm": 0.3775743246078491, + "learning_rate": 0.0002671979156567051, + "loss": 0.42220756530761716, + "mean_token_accuracy": 0.8735427415370941, + "num_tokens": 2823060.0, + "step": 1100 + }, + { + "epoch": 3.0, + "eval_entropy": 0.49825115099549294, + "eval_loss": 0.562646746635437, + "eval_mean_token_accuracy": 0.8420541244745254, + "eval_num_tokens": 2879229.0, + "eval_runtime": 52.1338, + "eval_samples_per_second": 30.671, + "eval_steps_per_second": 3.836, + "step": 1122 + }, + { + "entropy": 0.4122138220553446, + "epoch": 3.074966532797858, + "grad_norm": 0.4913100302219391, + "learning_rate": 0.0002626846294474985, + "loss": 0.3638055419921875, + "mean_token_accuracy": 0.8879981071057946, + "num_tokens": 2955424.0, + "step": 1150 + }, + { + "entropy": 0.38473270788788794, + "epoch": 3.208835341365462, + "grad_norm": 0.5340412259101868, + "learning_rate": 0.00025792629039379165, + "loss": 0.3335049438476563, + "mean_token_accuracy": 0.8928974050283432, + "num_tokens": 3078799.0, + "step": 1200 + }, + { + "entropy": 0.3838996239006519, + "epoch": 3.3427041499330654, + "grad_norm": 0.4576664865016937, + "learning_rate": 0.0002529332591717036, + "loss": 0.3357630157470703, + "mean_token_accuracy": 0.8939724805951118, + "num_tokens": 3211945.0, + "step": 1250 + }, + { + "entropy": 0.3874890775978565, + "epoch": 3.4765729585006695, + "grad_norm": 0.4499485194683075, + "learning_rate": 0.000247716407469603, + "loss": 0.3426588821411133, + "mean_token_accuracy": 0.8922444903850555, + "num_tokens": 3343933.0, + "step": 1300 + }, + { + "entropy": 0.3952706679701805, + "epoch": 3.610441767068273, + "grad_norm": 0.4777601957321167, + "learning_rate": 0.00024228709431639412, + "loss": 0.34926448822021483, + "mean_token_accuracy": 0.8901252299547195, + "num_tokens": 3470087.0, + "step": 1350 + }, + { + "entropy": 0.39983385235071184, + "epoch": 3.7443105756358768, + "grad_norm": 0.36449742317199707, + "learning_rate": 0.00023665714134868044, + "loss": 0.3480478286743164, + "mean_token_accuracy": 0.890526123046875, + "num_tokens": 3590858.0, + "step": 1400 + }, + { + "entropy": 0.3916309730708599, + "epoch": 3.878179384203481, + "grad_norm": 0.37300795316696167, + "learning_rate": 0.00023083880707065973, + "loss": 0.349254150390625, + "mean_token_accuracy": 0.8909555944800377, + "num_tokens": 3725806.0, + "step": 1450 + }, + { + "epoch": 4.0, + "eval_entropy": 0.4629781968891621, + "eval_loss": 0.5623059868812561, + "eval_mean_token_accuracy": 0.8462381008267402, + "eval_num_tokens": 3838972.0, + "eval_runtime": 52.0941, + "eval_samples_per_second": 30.694, + "eval_steps_per_second": 3.839, + "step": 1496 + }, + { + "entropy": 0.39569308842071377, + "epoch": 4.010709504685408, + "grad_norm": 0.40932103991508484, + "learning_rate": 0.0002248447601627953, + "loss": 0.3418621063232422, + "mean_token_accuracy": 0.8925740992782092, + "num_tokens": 3849380.0, + "step": 1500 + }, + { + "entropy": 0.29304657004773615, + "epoch": 4.144578313253012, + "grad_norm": 0.5317501425743103, + "learning_rate": 0.00021868805189738066, + "loss": 0.2386075210571289, + "mean_token_accuracy": 0.9206075271964074, + "num_tokens": 3976694.0, + "step": 1550 + }, + { + "entropy": 0.2991082117706537, + "epoch": 4.278447121820616, + "grad_norm": 0.5011683106422424, + "learning_rate": 0.00021238208772105958, + "loss": 0.2479239273071289, + "mean_token_accuracy": 0.9165873003005981, + "num_tokens": 4110204.0, + "step": 1600 + }, + { + "entropy": 0.2965650236606598, + "epoch": 4.412315930388219, + "grad_norm": 0.45324602723121643, + "learning_rate": 0.00020594059806617674, + "loss": 0.24801044464111327, + "mean_token_accuracy": 0.9172127342224121, + "num_tokens": 4240883.0, + "step": 1650 + }, + { + "entropy": 0.30350560761988166, + "epoch": 4.546184738955823, + "grad_norm": 0.4740602970123291, + "learning_rate": 0.00019937760845451308, + "loss": 0.2523613929748535, + "mean_token_accuracy": 0.9163035461306572, + "num_tokens": 4365381.0, + "step": 1700 + }, + { + "entropy": 0.3076958200335503, + "epoch": 4.680053547523427, + "grad_norm": 0.4616807699203491, + "learning_rate": 0.00019270740895850172, + "loss": 0.25745229721069335, + "mean_token_accuracy": 0.9147638303041458, + "num_tokens": 4490711.0, + "step": 1750 + }, + { + "entropy": 0.3103550442308187, + "epoch": 4.813922356091031, + "grad_norm": 0.5542571544647217, + "learning_rate": 0.00018594452308641905, + "loss": 0.2601847457885742, + "mean_token_accuracy": 0.9133006593585015, + "num_tokens": 4618532.0, + "step": 1800 + }, + { + "entropy": 0.30610986322164535, + "epoch": 4.947791164658635, + "grad_norm": 0.43830257654190063, + "learning_rate": 0.00017910367615929835, + "loss": 0.25211355209350583, + "mean_token_accuracy": 0.9155091819167137, + "num_tokens": 4752261.0, + "step": 1850 + }, + { + "epoch": 5.0, + "eval_entropy": 0.3738320705294609, + "eval_loss": 0.6202279925346375, + "eval_mean_token_accuracy": 0.8485285672545433, + "eval_num_tokens": 4798715.0, + "eval_runtime": 52.1176, + "eval_samples_per_second": 30.681, + "eval_steps_per_second": 3.837, + "step": 1870 + }, + { + "entropy": 0.24846129032849062, + "epoch": 5.080321285140562, + "grad_norm": 0.3632349371910095, + "learning_rate": 0.00017219976324842225, + "loss": 0.1933494758605957, + "mean_token_accuracy": 0.935125366906927, + "num_tokens": 4879271.0, + "step": 1900 + }, + { + "entropy": 0.211940533593297, + "epoch": 5.214190093708166, + "grad_norm": 0.5480105876922607, + "learning_rate": 0.00016524781674320582, + "loss": 0.15619863510131837, + "mean_token_accuracy": 0.9460694769024849, + "num_tokens": 5005076.0, + "step": 1950 + }, + { + "entropy": 0.214038780964911, + "epoch": 5.34805890227577, + "grad_norm": 0.5447636842727661, + "learning_rate": 0.00015826297362008662, + "loss": 0.15696516990661621, + "mean_token_accuracy": 0.9461162313818932, + "num_tokens": 5133591.0, + "step": 2000 + }, + { + "entropy": 0.21759917587041855, + "epoch": 5.481927710843373, + "grad_norm": 0.48160865902900696, + "learning_rate": 0.00015126044248368997, + "loss": 0.162998046875, + "mean_token_accuracy": 0.9450593250989914, + "num_tokens": 5257252.0, + "step": 2050 + }, + { + "entropy": 0.21842033743858338, + "epoch": 5.615796519410977, + "grad_norm": 0.5399336218833923, + "learning_rate": 0.00014425547045203324, + "loss": 0.1642344856262207, + "mean_token_accuracy": 0.9428097534179688, + "num_tokens": 5388652.0, + "step": 2100 + }, + { + "entropy": 0.21970326244831084, + "epoch": 5.749665327978581, + "grad_norm": 0.6201577186584473, + "learning_rate": 0.00013726330995787156, + "loss": 0.16450761795043944, + "mean_token_accuracy": 0.9430688858032227, + "num_tokens": 5515488.0, + "step": 2150 + }, + { + "entropy": 0.2100747512280941, + "epoch": 5.883534136546185, + "grad_norm": 0.5263391137123108, + "learning_rate": 0.00013029918553847215, + "loss": 0.15882587432861328, + "mean_token_accuracy": 0.9454078593850136, + "num_tokens": 5644323.0, + "step": 2200 + }, + { + "epoch": 6.0, + "eval_entropy": 0.30004522860050203, + "eval_loss": 0.710449755191803, + "eval_mean_token_accuracy": 0.8465994608402252, + "eval_num_tokens": 5758458.0, + "eval_runtime": 52.1136, + "eval_samples_per_second": 30.683, + "eval_steps_per_second": 3.838, + "step": 2244 + }, + { + "entropy": 0.21408834458902629, + "epoch": 6.016064257028113, + "grad_norm": 0.30019697546958923, + "learning_rate": 0.00012337826068612668, + "loss": 0.15355757713317872, + "mean_token_accuracy": 0.9468493735549426, + "num_tokens": 5773027.0, + "step": 2250 + }, + { + "entropy": 0.14564846321940422, + "epoch": 6.149933065595716, + "grad_norm": 0.44614729285240173, + "learning_rate": 0.00011651560483158201, + "loss": 0.09214784622192383, + "mean_token_accuracy": 0.9686754134297371, + "num_tokens": 5904840.0, + "step": 2300 + }, + { + "entropy": 0.140359299890697, + "epoch": 6.28380187416332, + "grad_norm": 0.5046149492263794, + "learning_rate": 0.00010972616053227751, + "loss": 0.09080178260803223, + "mean_token_accuracy": 0.9693775433301925, + "num_tokens": 6037457.0, + "step": 2350 + }, + { + "entropy": 0.15537990894168616, + "epoch": 6.417670682730924, + "grad_norm": 0.4861410856246948, + "learning_rate": 0.00010302471093683278, + "loss": 0.09675224304199219, + "mean_token_accuracy": 0.9666234213113785, + "num_tokens": 6161042.0, + "step": 2400 + }, + { + "entropy": 0.15113401643931865, + "epoch": 6.551539491298527, + "grad_norm": 0.40878698229789734, + "learning_rate": 9.642584759662807e-05, + "loss": 0.09406195640563965, + "mean_token_accuracy": 0.967639978826046, + "num_tokens": 6291748.0, + "step": 2450 + }, + { + "entropy": 0.15042921423912048, + "epoch": 6.685408299866131, + "grad_norm": 0.46771717071533203, + "learning_rate": 8.994393869456307e-05, + "loss": 0.09585455894470214, + "mean_token_accuracy": 0.9676038774847985, + "num_tokens": 6419265.0, + "step": 2500 + }, + { + "entropy": 0.1505335782468319, + "epoch": 6.8192771084337345, + "grad_norm": 0.38982564210891724, + "learning_rate": 8.359309776017181e-05, + "loss": 0.09631505012512206, + "mean_token_accuracy": 0.9675487798452377, + "num_tokens": 6544758.0, + "step": 2550 + }, + { + "entropy": 0.15015050683170558, + "epoch": 6.953145917001339, + "grad_norm": 0.3234366774559021, + "learning_rate": 7.738715293921322e-05, + "loss": 0.09446552276611328, + "mean_token_accuracy": 0.9673746883869171, + "num_tokens": 6674626.0, + "step": 2600 + }, + { + "epoch": 7.0, + "eval_entropy": 0.23365802489221096, + "eval_loss": 0.8532201647758484, + "eval_mean_token_accuracy": 0.8434653553366661, + "eval_num_tokens": 6718201.0, + "eval_runtime": 52.1492, + "eval_samples_per_second": 30.662, + "eval_steps_per_second": 3.835, + "step": 2618 + }, + { + "entropy": 0.1327564088307848, + "epoch": 7.085676037483267, + "grad_norm": 0.2602083086967468, + "learning_rate": 7.133961688464825e-05, + "loss": 0.07557847023010254, + "mean_token_accuracy": 0.9734747448954919, + "num_tokens": 6803830.0, + "step": 2650 + }, + { + "entropy": 0.11478025399148464, + "epoch": 7.21954484605087, + "grad_norm": 0.29537495970726013, + "learning_rate": 6.546365733456209e-05, + "loss": 0.06565972328186036, + "mean_token_accuracy": 0.977358937561512, + "num_tokens": 6936289.0, + "step": 2700 + }, + { + "entropy": 0.126174540463835, + "epoch": 7.353413654618474, + "grad_norm": 0.3025906980037689, + "learning_rate": 5.9772068441094775e-05, + "loss": 0.06768707275390624, + "mean_token_accuracy": 0.9757491773366929, + "num_tokens": 7057823.0, + "step": 2750 + }, + { + "entropy": 0.11537095734849573, + "epoch": 7.4872824631860775, + "grad_norm": 0.22307702898979187, + "learning_rate": 5.427724291280762e-05, + "loss": 0.0664353084564209, + "mean_token_accuracy": 0.9773979318141938, + "num_tokens": 7187020.0, + "step": 2800 + }, + { + "entropy": 0.11154996948316694, + "epoch": 7.621151271753681, + "grad_norm": 0.4059358239173889, + "learning_rate": 4.8991145031141555e-05, + "loss": 0.06321117877960206, + "mean_token_accuracy": 0.9781053271889687, + "num_tokens": 7323457.0, + "step": 2850 + }, + { + "entropy": 0.11587916240096093, + "epoch": 7.755020080321285, + "grad_norm": 0.185866579413414, + "learning_rate": 4.392528459972073e-05, + "loss": 0.06652077674865722, + "mean_token_accuracy": 0.9772412911057472, + "num_tokens": 7451765.0, + "step": 2900 + }, + { + "entropy": 0.11466571116819978, + "epoch": 7.888888888888889, + "grad_norm": 0.25750523805618286, + "learning_rate": 3.909069188322406e-05, + "loss": 0.06799327373504639, + "mean_token_accuracy": 0.9771603578329087, + "num_tokens": 7578222.0, + "step": 2950 + }, + { + "epoch": 8.0, + "eval_entropy": 0.2060005297511816, + "eval_loss": 0.955081045627594, + "eval_mean_token_accuracy": 0.847015127837658, + "eval_num_tokens": 7677944.0, + "eval_runtime": 52.1277, + "eval_samples_per_second": 30.675, + "eval_steps_per_second": 3.837, + "step": 2992 + }, + { + "entropy": 0.11911658171300936, + "epoch": 8.021419009370817, + "grad_norm": 0.11696725338697433, + "learning_rate": 3.449789359039139e-05, + "loss": 0.06788389205932617, + "mean_token_accuracy": 0.9765385636175522, + "num_tokens": 7699511.0, + "step": 3000 + }, + { + "entropy": 0.11421961288899184, + "epoch": 8.15528781793842, + "grad_norm": 0.14741645753383636, + "learning_rate": 3.015688995345818e-05, + "loss": 0.05964169025421143, + "mean_token_accuracy": 0.9789826035499573, + "num_tokens": 7822731.0, + "step": 3050 + }, + { + "entropy": 0.10774534512311221, + "epoch": 8.289156626506024, + "grad_norm": 0.09522435814142227, + "learning_rate": 2.6077132953926014e-05, + "loss": 0.055038743019104004, + "mean_token_accuracy": 0.9793784576654434, + "num_tokens": 7954237.0, + "step": 3100 + }, + { + "entropy": 0.10264276895672082, + "epoch": 8.423025435073628, + "grad_norm": 0.1083206757903099, + "learning_rate": 2.2267505742079047e-05, + "loss": 0.05623414993286133, + "mean_token_accuracy": 0.9798879814147949, + "num_tokens": 8082999.0, + "step": 3150 + }, + { + "entropy": 0.10384491421282291, + "epoch": 8.556894243641231, + "grad_norm": 0.1440686285495758, + "learning_rate": 1.8736303295057585e-05, + "loss": 0.05598822593688965, + "mean_token_accuracy": 0.9789952409267425, + "num_tokens": 8214329.0, + "step": 3200 + }, + { + "entropy": 0.1028516049310565, + "epoch": 8.690763052208835, + "grad_norm": 0.07812497019767761, + "learning_rate": 1.5491214355603893e-05, + "loss": 0.05675966739654541, + "mean_token_accuracy": 0.9799879723787308, + "num_tokens": 8345134.0, + "step": 3250 + }, + { + "entropy": 0.10926607897505164, + "epoch": 8.824631860776439, + "grad_norm": 0.09217273443937302, + "learning_rate": 1.2539304690806058e-05, + "loss": 0.05895336627960205, + "mean_token_accuracy": 0.9787288227677345, + "num_tokens": 8470788.0, + "step": 3300 + }, + { + "entropy": 0.10923314603045582, + "epoch": 8.958500669344042, + "grad_norm": 0.09998416155576706, + "learning_rate": 9.88700170729155e-06, + "loss": 0.05717726707458496, + "mean_token_accuracy": 0.9784420201182366, + "num_tokens": 8599154.0, + "step": 3350 + }, + { + "epoch": 9.0, + "eval_entropy": 0.18996331751346587, + "eval_loss": 1.0452677011489868, + "eval_mean_token_accuracy": 0.8471912437677384, + "eval_num_tokens": 8637687.0, + "eval_runtime": 52.1069, + "eval_samples_per_second": 30.687, + "eval_steps_per_second": 3.838, + "step": 3366 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.832809739463547e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/README.md b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/tokenizer_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8406f29c9d15fdcc49a8bfbb2d78741bdb118578 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-374/trainer_state.json @@ -0,0 +1,115 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 374, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.233439277197824e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/README.md b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/tokenizer_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..72fc51564fde08732bb7911c3a922c0284429781 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-3740/trainer_state.json @@ -0,0 +1,884 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 3740, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + }, + { + "entropy": 0.5630604237920106, + "epoch": 1.069611780455154, + "grad_norm": 0.7640048861503601, + "learning_rate": 0.00030023795987949866, + "loss": 0.5240223693847657, + "mean_token_accuracy": 0.8503808174470459, + "num_tokens": 1022581.0, + "step": 400 + }, + { + "entropy": 0.5431726336479187, + "epoch": 1.2034805890227578, + "grad_norm": 0.43694695830345154, + "learning_rate": 0.0002999111394498373, + "loss": 0.5033904266357422, + "mean_token_accuracy": 0.8546582865715027, + "num_tokens": 1151430.0, + "step": 450 + }, + { + "entropy": 0.539291135519743, + "epoch": 1.3373493975903614, + "grad_norm": 1.0249971151351929, + "learning_rate": 0.0002992582102002974, + "loss": 0.49736488342285157, + "mean_token_accuracy": 0.8569463074207306, + "num_tokens": 1282481.0, + "step": 500 + }, + { + "entropy": 0.5289052908122539, + "epoch": 1.4712182061579653, + "grad_norm": 0.862391471862793, + "learning_rate": 0.0002982805938010052, + "loss": 0.49582687377929685, + "mean_token_accuracy": 0.8568812811374664, + "num_tokens": 1414166.0, + "step": 550 + }, + { + "entropy": 0.532991835474968, + "epoch": 1.605087014725569, + "grad_norm": 0.44117477536201477, + "learning_rate": 0.000296980418886925, + "loss": 0.4954973220825195, + "mean_token_accuracy": 0.8582441991567612, + "num_tokens": 1547902.0, + "step": 600 + }, + { + "entropy": 0.5220623269677163, + "epoch": 1.7389558232931726, + "grad_norm": 0.42622247338294983, + "learning_rate": 0.00029536051642302825, + "loss": 0.4832091522216797, + "mean_token_accuracy": 0.8612468218803406, + "num_tokens": 1678635.0, + "step": 650 + }, + { + "entropy": 0.5149065843224525, + "epoch": 1.8728246318607764, + "grad_norm": 0.5336588621139526, + "learning_rate": 0.0002934244135402283, + "loss": 0.48042800903320315, + "mean_token_accuracy": 0.8626475504040718, + "num_tokens": 1803328.0, + "step": 700 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5772490365803242, + "eval_loss": 0.5702283978462219, + "eval_mean_token_accuracy": 0.8345756351947784, + "eval_num_tokens": 1919486.0, + "eval_runtime": 52.1598, + "eval_samples_per_second": 30.656, + "eval_steps_per_second": 3.834, + "step": 748 + }, + { + "entropy": 0.535869851106345, + "epoch": 2.005354752342704, + "grad_norm": 0.41517460346221924, + "learning_rate": 0.00029117632585550326, + "loss": 0.4852032470703125, + "mean_token_accuracy": 0.8596673321844351, + "num_tokens": 1925307.0, + "step": 750 + }, + { + "entropy": 0.447470418959856, + "epoch": 2.139223560910308, + "grad_norm": 0.3979392349720001, + "learning_rate": 0.00028862114829292755, + "loss": 0.4028204345703125, + "mean_token_accuracy": 0.8767653766274452, + "num_tokens": 2055627.0, + "step": 800 + }, + { + "entropy": 0.4631192748993635, + "epoch": 2.2730923694779115, + "grad_norm": 0.5497493147850037, + "learning_rate": 0.0002857644444255997, + "loss": 0.41807849884033205, + "mean_token_accuracy": 0.8721370902657509, + "num_tokens": 2177850.0, + "step": 850 + }, + { + "entropy": 0.4618495012819767, + "epoch": 2.4069611780455156, + "grad_norm": 0.5762711763381958, + "learning_rate": 0.00028261243436167164, + "loss": 0.4263508987426758, + "mean_token_accuracy": 0.8728304222226143, + "num_tokens": 2304612.0, + "step": 900 + }, + { + "entropy": 0.46360296294093134, + "epoch": 2.540829986613119, + "grad_norm": 0.4397684335708618, + "learning_rate": 0.00027917198120085693, + "loss": 0.4204945373535156, + "mean_token_accuracy": 0.8729385876655579, + "num_tokens": 2430946.0, + "step": 950 + }, + { + "entropy": 0.4606146043539047, + "epoch": 2.674698795180723, + "grad_norm": 0.49730950593948364, + "learning_rate": 0.0002754505760909068, + "loss": 0.41891841888427733, + "mean_token_accuracy": 0.8735697677731514, + "num_tokens": 2557241.0, + "step": 1000 + }, + { + "entropy": 0.4599393020570278, + "epoch": 2.8085676037483265, + "grad_norm": 0.3867562711238861, + "learning_rate": 0.00027145632191659207, + "loss": 0.4164935302734375, + "mean_token_accuracy": 0.8750462782382965, + "num_tokens": 2692775.0, + "step": 1050 + }, + { + "entropy": 0.4657205778360367, + "epoch": 2.9424364123159306, + "grad_norm": 0.3775743246078491, + "learning_rate": 0.0002671979156567051, + "loss": 0.42220756530761716, + "mean_token_accuracy": 0.8735427415370941, + "num_tokens": 2823060.0, + "step": 1100 + }, + { + "epoch": 3.0, + "eval_entropy": 0.49825115099549294, + "eval_loss": 0.562646746635437, + "eval_mean_token_accuracy": 0.8420541244745254, + "eval_num_tokens": 2879229.0, + "eval_runtime": 52.1338, + "eval_samples_per_second": 30.671, + "eval_steps_per_second": 3.836, + "step": 1122 + }, + { + "entropy": 0.4122138220553446, + "epoch": 3.074966532797858, + "grad_norm": 0.4913100302219391, + "learning_rate": 0.0002626846294474985, + "loss": 0.3638055419921875, + "mean_token_accuracy": 0.8879981071057946, + "num_tokens": 2955424.0, + "step": 1150 + }, + { + "entropy": 0.38473270788788794, + "epoch": 3.208835341365462, + "grad_norm": 0.5340412259101868, + "learning_rate": 0.00025792629039379165, + "loss": 0.3335049438476563, + "mean_token_accuracy": 0.8928974050283432, + "num_tokens": 3078799.0, + "step": 1200 + }, + { + "entropy": 0.3838996239006519, + "epoch": 3.3427041499330654, + "grad_norm": 0.4576664865016937, + "learning_rate": 0.0002529332591717036, + "loss": 0.3357630157470703, + "mean_token_accuracy": 0.8939724805951118, + "num_tokens": 3211945.0, + "step": 1250 + }, + { + "entropy": 0.3874890775978565, + "epoch": 3.4765729585006695, + "grad_norm": 0.4499485194683075, + "learning_rate": 0.000247716407469603, + "loss": 0.3426588821411133, + "mean_token_accuracy": 0.8922444903850555, + "num_tokens": 3343933.0, + "step": 1300 + }, + { + "entropy": 0.3952706679701805, + "epoch": 3.610441767068273, + "grad_norm": 0.4777601957321167, + "learning_rate": 0.00024228709431639412, + "loss": 0.34926448822021483, + "mean_token_accuracy": 0.8901252299547195, + "num_tokens": 3470087.0, + "step": 1350 + }, + { + "entropy": 0.39983385235071184, + "epoch": 3.7443105756358768, + "grad_norm": 0.36449742317199707, + "learning_rate": 0.00023665714134868044, + "loss": 0.3480478286743164, + "mean_token_accuracy": 0.890526123046875, + "num_tokens": 3590858.0, + "step": 1400 + }, + { + "entropy": 0.3916309730708599, + "epoch": 3.878179384203481, + "grad_norm": 0.37300795316696167, + "learning_rate": 0.00023083880707065973, + "loss": 0.349254150390625, + "mean_token_accuracy": 0.8909555944800377, + "num_tokens": 3725806.0, + "step": 1450 + }, + { + "epoch": 4.0, + "eval_entropy": 0.4629781968891621, + "eval_loss": 0.5623059868812561, + "eval_mean_token_accuracy": 0.8462381008267402, + "eval_num_tokens": 3838972.0, + "eval_runtime": 52.0941, + "eval_samples_per_second": 30.694, + "eval_steps_per_second": 3.839, + "step": 1496 + }, + { + "entropy": 0.39569308842071377, + "epoch": 4.010709504685408, + "grad_norm": 0.40932103991508484, + "learning_rate": 0.0002248447601627953, + "loss": 0.3418621063232422, + "mean_token_accuracy": 0.8925740992782092, + "num_tokens": 3849380.0, + "step": 1500 + }, + { + "entropy": 0.29304657004773615, + "epoch": 4.144578313253012, + "grad_norm": 0.5317501425743103, + "learning_rate": 0.00021868805189738066, + "loss": 0.2386075210571289, + "mean_token_accuracy": 0.9206075271964074, + "num_tokens": 3976694.0, + "step": 1550 + }, + { + "entropy": 0.2991082117706537, + "epoch": 4.278447121820616, + "grad_norm": 0.5011683106422424, + "learning_rate": 0.00021238208772105958, + "loss": 0.2479239273071289, + "mean_token_accuracy": 0.9165873003005981, + "num_tokens": 4110204.0, + "step": 1600 + }, + { + "entropy": 0.2965650236606598, + "epoch": 4.412315930388219, + "grad_norm": 0.45324602723121643, + "learning_rate": 0.00020594059806617674, + "loss": 0.24801044464111327, + "mean_token_accuracy": 0.9172127342224121, + "num_tokens": 4240883.0, + "step": 1650 + }, + { + "entropy": 0.30350560761988166, + "epoch": 4.546184738955823, + "grad_norm": 0.4740602970123291, + "learning_rate": 0.00019937760845451308, + "loss": 0.2523613929748535, + "mean_token_accuracy": 0.9163035461306572, + "num_tokens": 4365381.0, + "step": 1700 + }, + { + "entropy": 0.3076958200335503, + "epoch": 4.680053547523427, + "grad_norm": 0.4616807699203491, + "learning_rate": 0.00019270740895850172, + "loss": 0.25745229721069335, + "mean_token_accuracy": 0.9147638303041458, + "num_tokens": 4490711.0, + "step": 1750 + }, + { + "entropy": 0.3103550442308187, + "epoch": 4.813922356091031, + "grad_norm": 0.5542571544647217, + "learning_rate": 0.00018594452308641905, + "loss": 0.2601847457885742, + "mean_token_accuracy": 0.9133006593585015, + "num_tokens": 4618532.0, + "step": 1800 + }, + { + "entropy": 0.30610986322164535, + "epoch": 4.947791164658635, + "grad_norm": 0.43830257654190063, + "learning_rate": 0.00017910367615929835, + "loss": 0.25211355209350583, + "mean_token_accuracy": 0.9155091819167137, + "num_tokens": 4752261.0, + "step": 1850 + }, + { + "epoch": 5.0, + "eval_entropy": 0.3738320705294609, + "eval_loss": 0.6202279925346375, + "eval_mean_token_accuracy": 0.8485285672545433, + "eval_num_tokens": 4798715.0, + "eval_runtime": 52.1176, + "eval_samples_per_second": 30.681, + "eval_steps_per_second": 3.837, + "step": 1870 + }, + { + "entropy": 0.24846129032849062, + "epoch": 5.080321285140562, + "grad_norm": 0.3632349371910095, + "learning_rate": 0.00017219976324842225, + "loss": 0.1933494758605957, + "mean_token_accuracy": 0.935125366906927, + "num_tokens": 4879271.0, + "step": 1900 + }, + { + "entropy": 0.211940533593297, + "epoch": 5.214190093708166, + "grad_norm": 0.5480105876922607, + "learning_rate": 0.00016524781674320582, + "loss": 0.15619863510131837, + "mean_token_accuracy": 0.9460694769024849, + "num_tokens": 5005076.0, + "step": 1950 + }, + { + "entropy": 0.214038780964911, + "epoch": 5.34805890227577, + "grad_norm": 0.5447636842727661, + "learning_rate": 0.00015826297362008662, + "loss": 0.15696516990661621, + "mean_token_accuracy": 0.9461162313818932, + "num_tokens": 5133591.0, + "step": 2000 + }, + { + "entropy": 0.21759917587041855, + "epoch": 5.481927710843373, + "grad_norm": 0.48160865902900696, + "learning_rate": 0.00015126044248368997, + "loss": 0.162998046875, + "mean_token_accuracy": 0.9450593250989914, + "num_tokens": 5257252.0, + "step": 2050 + }, + { + "entropy": 0.21842033743858338, + "epoch": 5.615796519410977, + "grad_norm": 0.5399336218833923, + "learning_rate": 0.00014425547045203324, + "loss": 0.1642344856262207, + "mean_token_accuracy": 0.9428097534179688, + "num_tokens": 5388652.0, + "step": 2100 + }, + { + "entropy": 0.21970326244831084, + "epoch": 5.749665327978581, + "grad_norm": 0.6201577186584473, + "learning_rate": 0.00013726330995787156, + "loss": 0.16450761795043944, + "mean_token_accuracy": 0.9430688858032227, + "num_tokens": 5515488.0, + "step": 2150 + }, + { + "entropy": 0.2100747512280941, + "epoch": 5.883534136546185, + "grad_norm": 0.5263391137123108, + "learning_rate": 0.00013029918553847215, + "loss": 0.15882587432861328, + "mean_token_accuracy": 0.9454078593850136, + "num_tokens": 5644323.0, + "step": 2200 + }, + { + "epoch": 6.0, + "eval_entropy": 0.30004522860050203, + "eval_loss": 0.710449755191803, + "eval_mean_token_accuracy": 0.8465994608402252, + "eval_num_tokens": 5758458.0, + "eval_runtime": 52.1136, + "eval_samples_per_second": 30.683, + "eval_steps_per_second": 3.838, + "step": 2244 + }, + { + "entropy": 0.21408834458902629, + "epoch": 6.016064257028113, + "grad_norm": 0.30019697546958923, + "learning_rate": 0.00012337826068612668, + "loss": 0.15355757713317872, + "mean_token_accuracy": 0.9468493735549426, + "num_tokens": 5773027.0, + "step": 2250 + }, + { + "entropy": 0.14564846321940422, + "epoch": 6.149933065595716, + "grad_norm": 0.44614729285240173, + "learning_rate": 0.00011651560483158201, + "loss": 0.09214784622192383, + "mean_token_accuracy": 0.9686754134297371, + "num_tokens": 5904840.0, + "step": 2300 + }, + { + "entropy": 0.140359299890697, + "epoch": 6.28380187416332, + "grad_norm": 0.5046149492263794, + "learning_rate": 0.00010972616053227751, + "loss": 0.09080178260803223, + "mean_token_accuracy": 0.9693775433301925, + "num_tokens": 6037457.0, + "step": 2350 + }, + { + "entropy": 0.15537990894168616, + "epoch": 6.417670682730924, + "grad_norm": 0.4861410856246948, + "learning_rate": 0.00010302471093683278, + "loss": 0.09675224304199219, + "mean_token_accuracy": 0.9666234213113785, + "num_tokens": 6161042.0, + "step": 2400 + }, + { + "entropy": 0.15113401643931865, + "epoch": 6.551539491298527, + "grad_norm": 0.40878698229789734, + "learning_rate": 9.642584759662807e-05, + "loss": 0.09406195640563965, + "mean_token_accuracy": 0.967639978826046, + "num_tokens": 6291748.0, + "step": 2450 + }, + { + "entropy": 0.15042921423912048, + "epoch": 6.685408299866131, + "grad_norm": 0.46771717071533203, + "learning_rate": 8.994393869456307e-05, + "loss": 0.09585455894470214, + "mean_token_accuracy": 0.9676038774847985, + "num_tokens": 6419265.0, + "step": 2500 + }, + { + "entropy": 0.1505335782468319, + "epoch": 6.8192771084337345, + "grad_norm": 0.38982564210891724, + "learning_rate": 8.359309776017181e-05, + "loss": 0.09631505012512206, + "mean_token_accuracy": 0.9675487798452377, + "num_tokens": 6544758.0, + "step": 2550 + }, + { + "entropy": 0.15015050683170558, + "epoch": 6.953145917001339, + "grad_norm": 0.3234366774559021, + "learning_rate": 7.738715293921322e-05, + "loss": 0.09446552276611328, + "mean_token_accuracy": 0.9673746883869171, + "num_tokens": 6674626.0, + "step": 2600 + }, + { + "epoch": 7.0, + "eval_entropy": 0.23365802489221096, + "eval_loss": 0.8532201647758484, + "eval_mean_token_accuracy": 0.8434653553366661, + "eval_num_tokens": 6718201.0, + "eval_runtime": 52.1492, + "eval_samples_per_second": 30.662, + "eval_steps_per_second": 3.835, + "step": 2618 + }, + { + "entropy": 0.1327564088307848, + "epoch": 7.085676037483267, + "grad_norm": 0.2602083086967468, + "learning_rate": 7.133961688464825e-05, + "loss": 0.07557847023010254, + "mean_token_accuracy": 0.9734747448954919, + "num_tokens": 6803830.0, + "step": 2650 + }, + { + "entropy": 0.11478025399148464, + "epoch": 7.21954484605087, + "grad_norm": 0.29537495970726013, + "learning_rate": 6.546365733456209e-05, + "loss": 0.06565972328186036, + "mean_token_accuracy": 0.977358937561512, + "num_tokens": 6936289.0, + "step": 2700 + }, + { + "entropy": 0.126174540463835, + "epoch": 7.353413654618474, + "grad_norm": 0.3025906980037689, + "learning_rate": 5.9772068441094775e-05, + "loss": 0.06768707275390624, + "mean_token_accuracy": 0.9757491773366929, + "num_tokens": 7057823.0, + "step": 2750 + }, + { + "entropy": 0.11537095734849573, + "epoch": 7.4872824631860775, + "grad_norm": 0.22307702898979187, + "learning_rate": 5.427724291280762e-05, + "loss": 0.0664353084564209, + "mean_token_accuracy": 0.9773979318141938, + "num_tokens": 7187020.0, + "step": 2800 + }, + { + "entropy": 0.11154996948316694, + "epoch": 7.621151271753681, + "grad_norm": 0.4059358239173889, + "learning_rate": 4.8991145031141555e-05, + "loss": 0.06321117877960206, + "mean_token_accuracy": 0.9781053271889687, + "num_tokens": 7323457.0, + "step": 2850 + }, + { + "entropy": 0.11587916240096093, + "epoch": 7.755020080321285, + "grad_norm": 0.185866579413414, + "learning_rate": 4.392528459972073e-05, + "loss": 0.06652077674865722, + "mean_token_accuracy": 0.9772412911057472, + "num_tokens": 7451765.0, + "step": 2900 + }, + { + "entropy": 0.11466571116819978, + "epoch": 7.888888888888889, + "grad_norm": 0.25750523805618286, + "learning_rate": 3.909069188322406e-05, + "loss": 0.06799327373504639, + "mean_token_accuracy": 0.9771603578329087, + "num_tokens": 7578222.0, + "step": 2950 + }, + { + "epoch": 8.0, + "eval_entropy": 0.2060005297511816, + "eval_loss": 0.955081045627594, + "eval_mean_token_accuracy": 0.847015127837658, + "eval_num_tokens": 7677944.0, + "eval_runtime": 52.1277, + "eval_samples_per_second": 30.675, + "eval_steps_per_second": 3.837, + "step": 2992 + }, + { + "entropy": 0.11911658171300936, + "epoch": 8.021419009370817, + "grad_norm": 0.11696725338697433, + "learning_rate": 3.449789359039139e-05, + "loss": 0.06788389205932617, + "mean_token_accuracy": 0.9765385636175522, + "num_tokens": 7699511.0, + "step": 3000 + }, + { + "entropy": 0.11421961288899184, + "epoch": 8.15528781793842, + "grad_norm": 0.14741645753383636, + "learning_rate": 3.015688995345818e-05, + "loss": 0.05964169025421143, + "mean_token_accuracy": 0.9789826035499573, + "num_tokens": 7822731.0, + "step": 3050 + }, + { + "entropy": 0.10774534512311221, + "epoch": 8.289156626506024, + "grad_norm": 0.09522435814142227, + "learning_rate": 2.6077132953926014e-05, + "loss": 0.055038743019104004, + "mean_token_accuracy": 0.9793784576654434, + "num_tokens": 7954237.0, + "step": 3100 + }, + { + "entropy": 0.10264276895672082, + "epoch": 8.423025435073628, + "grad_norm": 0.1083206757903099, + "learning_rate": 2.2267505742079047e-05, + "loss": 0.05623414993286133, + "mean_token_accuracy": 0.9798879814147949, + "num_tokens": 8082999.0, + "step": 3150 + }, + { + "entropy": 0.10384491421282291, + "epoch": 8.556894243641231, + "grad_norm": 0.1440686285495758, + "learning_rate": 1.8736303295057585e-05, + "loss": 0.05598822593688965, + "mean_token_accuracy": 0.9789952409267425, + "num_tokens": 8214329.0, + "step": 3200 + }, + { + "entropy": 0.1028516049310565, + "epoch": 8.690763052208835, + "grad_norm": 0.07812497019767761, + "learning_rate": 1.5491214355603893e-05, + "loss": 0.05675966739654541, + "mean_token_accuracy": 0.9799879723787308, + "num_tokens": 8345134.0, + "step": 3250 + }, + { + "entropy": 0.10926607897505164, + "epoch": 8.824631860776439, + "grad_norm": 0.09217273443937302, + "learning_rate": 1.2539304690806058e-05, + "loss": 0.05895336627960205, + "mean_token_accuracy": 0.9787288227677345, + "num_tokens": 8470788.0, + "step": 3300 + }, + { + "entropy": 0.10923314603045582, + "epoch": 8.958500669344042, + "grad_norm": 0.09998416155576706, + "learning_rate": 9.88700170729155e-06, + "loss": 0.05717726707458496, + "mean_token_accuracy": 0.9784420201182366, + "num_tokens": 8599154.0, + "step": 3350 + }, + { + "epoch": 9.0, + "eval_entropy": 0.18996331751346587, + "eval_loss": 1.0452677011489868, + "eval_mean_token_accuracy": 0.8471912437677384, + "eval_num_tokens": 8637687.0, + "eval_runtime": 52.1069, + "eval_samples_per_second": 30.687, + "eval_steps_per_second": 3.838, + "step": 3366 + }, + { + "entropy": 0.1144553649079318, + "epoch": 9.09103078982597, + "grad_norm": 0.13222968578338623, + "learning_rate": 7.540080456369616e-06, + "loss": 0.05750402450561523, + "mean_token_accuracy": 0.9785326710253051, + "num_tokens": 8718657.0, + "step": 3400 + }, + { + "entropy": 0.10517731338739394, + "epoch": 9.224899598393574, + "grad_norm": 0.12909802794456482, + "learning_rate": 5.503651059593867e-06, + "loss": 0.05584810256958008, + "mean_token_accuracy": 0.9796554574370384, + "num_tokens": 8841327.0, + "step": 3450 + }, + { + "entropy": 0.10559520876035094, + "epoch": 9.358768406961179, + "grad_norm": 0.12645494937896729, + "learning_rate": 3.7821475821247533e-06, + "loss": 0.054477386474609375, + "mean_token_accuracy": 0.9798963716626168, + "num_tokens": 8968182.0, + "step": 3500 + }, + { + "entropy": 0.09716267803683877, + "epoch": 9.492637215528783, + "grad_norm": 0.08730494230985641, + "learning_rate": 2.3793183781182985e-06, + "loss": 0.05149771690368652, + "mean_token_accuracy": 0.9808312100172043, + "num_tokens": 9103318.0, + "step": 3550 + }, + { + "entropy": 0.10383410470560193, + "epoch": 9.626506024096386, + "grad_norm": 0.13881422579288483, + "learning_rate": 1.2982179291633611e-06, + "loss": 0.053290209770202636, + "mean_token_accuracy": 0.9801876321434975, + "num_tokens": 9233233.0, + "step": 3600 + }, + { + "entropy": 0.10055916175246239, + "epoch": 9.76037483266399, + "grad_norm": 0.13967634737491608, + "learning_rate": 5.412001935377276e-07, + "loss": 0.052958621978759765, + "mean_token_accuracy": 0.9807366824150086, + "num_tokens": 9362807.0, + "step": 3650 + }, + { + "entropy": 0.09396021397784353, + "epoch": 9.894243641231594, + "grad_norm": 0.12610477209091187, + "learning_rate": 1.0991348076424971e-07, + "loss": 0.05042768955230713, + "mean_token_accuracy": 0.9817433747649192, + "num_tokens": 9500858.0, + "step": 3700 + }, + { + "epoch": 10.0, + "eval_entropy": 0.18343592882156373, + "eval_loss": 1.0876376628875732, + "eval_mean_token_accuracy": 0.8472404077649116, + "eval_num_tokens": 9597430.0, + "eval_runtime": 52.117, + "eval_samples_per_second": 30.681, + "eval_steps_per_second": 3.838, + "step": 3740 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.25984195502038e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/README.md b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab1c84818e69f811f16f60142e1cc06e6613121 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/README.md @@ -0,0 +1,209 @@ +--- +base_model: Qwen/Qwen3-4B-Base +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen3-4B-Base +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/adapter_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f13b19332fae013122e1916eff44b061e2e20 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen3-4B-Base", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.0017462467225381586, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/chat_template.jinja b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..699ff8df401fe4788525e9c1f9b86a99eadd6230 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/chat_template.jinja @@ -0,0 +1,85 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = message.content.split('')[-1].lstrip('\n') %} + {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/tokenizer_config.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c960ecf0d33fd7b8c99d12680c0e74a82b36d446 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/trainer_state.json b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..390958e4e27047f3ffdfb8331e04446ac50c9db5 --- /dev/null +++ b/DBCA_code_Swedish/Qwen3-4B-Base_code_features_structural_train_code_features_structural_test1/checkpoint-748/trainer_state.json @@ -0,0 +1,196 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 748, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.6813136652112006, + "epoch": 0.13386880856760375, + "grad_norm": 1.0424015522003174, + "learning_rate": 3.9341343922025036e-05, + "loss": 1.5550880432128906, + "mean_token_accuracy": 0.6842062160372734, + "num_tokens": 127704.0, + "step": 50 + }, + { + "entropy": 0.6852626533806324, + "epoch": 0.2677376171352075, + "grad_norm": 0.8162124156951904, + "learning_rate": 7.948557241388732e-05, + "loss": 0.655595932006836, + "mean_token_accuracy": 0.8148700225353241, + "num_tokens": 256077.0, + "step": 100 + }, + { + "entropy": 0.6078765162825585, + "epoch": 0.40160642570281124, + "grad_norm": 0.5338373184204102, + "learning_rate": 0.00011962980090574959, + "loss": 0.5743931198120117, + "mean_token_accuracy": 0.8384771513938903, + "num_tokens": 387735.0, + "step": 150 + }, + { + "entropy": 0.572347212433815, + "epoch": 0.535475234270415, + "grad_norm": 0.438870906829834, + "learning_rate": 0.00015977402939761187, + "loss": 0.5378407287597656, + "mean_token_accuracy": 0.8479666405916214, + "num_tokens": 522202.0, + "step": 200 + }, + { + "entropy": 0.5688656893372536, + "epoch": 0.6693440428380187, + "grad_norm": 0.45694512128829956, + "learning_rate": 0.00019991825788947417, + "loss": 0.5383724975585937, + "mean_token_accuracy": 0.8476061511039734, + "num_tokens": 648663.0, + "step": 250 + }, + { + "entropy": 0.5601760675013066, + "epoch": 0.8032128514056225, + "grad_norm": 0.4569605886936188, + "learning_rate": 0.00024006248638133644, + "loss": 0.524285774230957, + "mean_token_accuracy": 0.8504812774062157, + "num_tokens": 778245.0, + "step": 300 + }, + { + "entropy": 0.556607717871666, + "epoch": 0.9370816599732262, + "grad_norm": 0.8242377042770386, + "learning_rate": 0.0002802067148731987, + "loss": 0.5218722915649414, + "mean_token_accuracy": 0.850643849670887, + "num_tokens": 905328.0, + "step": 350 + }, + { + "epoch": 1.0, + "eval_entropy": 0.6225093650817871, + "eval_loss": 0.603208601474762, + "eval_mean_token_accuracy": 0.8343255198001862, + "eval_num_tokens": 959743.0, + "eval_runtime": 52.3961, + "eval_samples_per_second": 30.518, + "eval_steps_per_second": 3.817, + "step": 374 + }, + { + "entropy": 0.5630604237920106, + "epoch": 1.069611780455154, + "grad_norm": 0.7640048861503601, + "learning_rate": 0.00030023795987949866, + "loss": 0.5240223693847657, + "mean_token_accuracy": 0.8503808174470459, + "num_tokens": 1022581.0, + "step": 400 + }, + { + "entropy": 0.5431726336479187, + "epoch": 1.2034805890227578, + "grad_norm": 0.43694695830345154, + "learning_rate": 0.0002999111394498373, + "loss": 0.5033904266357422, + "mean_token_accuracy": 0.8546582865715027, + "num_tokens": 1151430.0, + "step": 450 + }, + { + "entropy": 0.539291135519743, + "epoch": 1.3373493975903614, + "grad_norm": 1.0249971151351929, + "learning_rate": 0.0002992582102002974, + "loss": 0.49736488342285157, + "mean_token_accuracy": 0.8569463074207306, + "num_tokens": 1282481.0, + "step": 500 + }, + { + "entropy": 0.5289052908122539, + "epoch": 1.4712182061579653, + "grad_norm": 0.862391471862793, + "learning_rate": 0.0002982805938010052, + "loss": 0.49582687377929685, + "mean_token_accuracy": 0.8568812811374664, + "num_tokens": 1414166.0, + "step": 550 + }, + { + "entropy": 0.532991835474968, + "epoch": 1.605087014725569, + "grad_norm": 0.44117477536201477, + "learning_rate": 0.000296980418886925, + "loss": 0.4954973220825195, + "mean_token_accuracy": 0.8582441991567612, + "num_tokens": 1547902.0, + "step": 600 + }, + { + "entropy": 0.5220623269677163, + "epoch": 1.7389558232931726, + "grad_norm": 0.42622247338294983, + "learning_rate": 0.00029536051642302825, + "loss": 0.4832091522216797, + "mean_token_accuracy": 0.8612468218803406, + "num_tokens": 1678635.0, + "step": 650 + }, + { + "entropy": 0.5149065843224525, + "epoch": 1.8728246318607764, + "grad_norm": 0.5336588621139526, + "learning_rate": 0.0002934244135402283, + "loss": 0.48042800903320315, + "mean_token_accuracy": 0.8626475504040718, + "num_tokens": 1803328.0, + "step": 700 + }, + { + "epoch": 2.0, + "eval_entropy": 0.5772490365803242, + "eval_loss": 0.5702283978462219, + "eval_mean_token_accuracy": 0.8345756351947784, + "eval_num_tokens": 1919486.0, + "eval_runtime": 52.1598, + "eval_samples_per_second": 30.656, + "eval_steps_per_second": 3.834, + "step": 748 + } + ], + "logging_steps": 50, + "max_steps": 3740, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.50104158066135e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-100/tokenizer.json b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-100/training_args.bin b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..927d1897323eb9864304070a375f571c8508a906 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3956e36264b3e2b685ce8cabaf4b724f77cd33f9d97540dc391de0e5f9bb899 +size 6033 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1000/tokenizer.json b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1000/training_args.bin b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..927d1897323eb9864304070a375f571c8508a906 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3956e36264b3e2b685ce8cabaf4b724f77cd33f9d97540dc391de0e5f9bb899 +size 6033 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1020/tokenizer.json b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1020/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1020/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1020/training_args.bin b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1020/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..927d1897323eb9864304070a375f571c8508a906 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1020/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3956e36264b3e2b685ce8cabaf4b724f77cd33f9d97540dc391de0e5f9bb899 +size 6033 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1040/tokenizer.json b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1040/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1040/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1040/training_args.bin b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1040/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..927d1897323eb9864304070a375f571c8508a906 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1040/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3956e36264b3e2b685ce8cabaf4b724f77cd33f9d97540dc391de0e5f9bb899 +size 6033 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1060/tokenizer.json b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1060/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1060/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1060/training_args.bin b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1060/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..927d1897323eb9864304070a375f571c8508a906 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1060/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3956e36264b3e2b685ce8cabaf4b724f77cd33f9d97540dc391de0e5f9bb899 +size 6033 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1080/tokenizer.json b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1080/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1080/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1080/training_args.bin b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1080/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..927d1897323eb9864304070a375f571c8508a906 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1080/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3956e36264b3e2b685ce8cabaf4b724f77cd33f9d97540dc391de0e5f9bb899 +size 6033 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1100/tokenizer.json b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1100/training_args.bin b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..927d1897323eb9864304070a375f571c8508a906 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3956e36264b3e2b685ce8cabaf4b724f77cd33f9d97540dc391de0e5f9bb899 +size 6033 diff --git a/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1120/training_args.bin b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..927d1897323eb9864304070a375f571c8508a906 --- /dev/null +++ b/overgeneralisation_original_Estonian/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-1120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3956e36264b3e2b685ce8cabaf4b724f77cd33f9d97540dc391de0e5f9bb899 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b17265517291c9159c968967904e66fd8150e12c --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f25362f5ac2b250268e2dee14f1feed752672ed8dd666b00907ac0c31fea6a3 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39bbb14b052ebf03a8be3a926c0f4ea7f485f294 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81727e1c7e5ff3075ba18ab0cc71d27e2b57db0334880028619d1c7d3e4a4c88 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b356159b77e53d0eba26e0afd1d150f0c5041e9 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f79c19a2c88bbad07986ec9bbd02239db235ffa4286ebc3880439d0ed908e855 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1020/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdc722069f781accc8de085b856fe220a6b680ac --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2909996435a385c27c9566f715ab25fd25e2c1fb68026e71e9a305a043127cc7 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1040/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2941bbd3d3c4e65e176f7223789229ac7f63e669 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bfd9720ba1ccb651f70d5f3d829db6240dbcac0f4fd323f1085c4769ac3737a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1060/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ef6433b8a34f71f7d4abc699c6f3cc639f1f2ce --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4401308c1f08eb8bc3e1dea1a33db0d61a6207c0d6a5066ad0b4f7f29436d9a8 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1080/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32203adf28e2ba4c83d8908b36efa314f5a49c7b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb822e40231283686f629ac44767f980c9fe4c77f30d2dfe8e1192f1d6db542 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce84b334eece867837585fa96ac047c75e6093ca --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28efc7d7347342e89cb1baef2fb3ffb53b7de809711d405855d05c77e24730ad +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63d2d945f9a71eae213559e319f502d09e292244 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9129393cd68f37a28c7e1461bcc87a119e5897048a7d5aa985a177ca6dc6b28c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84b38668f659285229617dfa7a6b5c72db410405 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2550690f4cf0fdea8be2c83ece7b4371b3e18e5769eb26aef8af2954894392 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af07ee88fa18c0d0c412b6c08737086b27502c67 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36f2fa04af1da264b91443eee2a3d64f010f093cd77d225cb0d00b4904c92c9c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aaadda3bc00a9426e9bd44f19f8834fdaa556941 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64cb16a850656db52579b7f3cba5f6df133f85aaa8d708523dc4f9205e5a85a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5532a64746fbe5caf78551b3b82ead31fed08f79 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2732c23d93809923cffde90bc4a7bf83792953c9c2cb97bad68a71dab4d18168 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5c3da918bd5282e0d496a0044f8786a5b6f5fc3 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:501ef4129ac35200aac2c53845f4e56caf3d36058f9cf563d2e4670ec3f95771 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1220/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3419ce363e7f3236ef991663959e3865a09a3c59 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f5e7eab207bb4256322d955933523413443a391ca9c0ef8c2f26605fdaa09c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7abae6343d4ee214ec65234269c7065a9cce4393 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a06c5811ea4db4a39ccecb268b46ab5eca7c2a87fe989afa15459504512610e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83a5108140779d0df3345a6f59f2489e5cbe94c0 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c699db797639b3553c9698d34d1423d662ebf2e4d80f1d18df1aa9cf0a1c3b57 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c458d7704980040f21e63897585e23fe454fc26d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdcfd3580fa3a8ba0fee24a1ce67d483f67220e9201c22e9c32c2adedec16f8e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1b4a86c668da1c721b91d2b5fdca6a749f8c2ff --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd926409c4601191d50c2d3e8263e932c7b962000525e2cdd33898b19ee98a56 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2bdee038497a809e99e41f01acda783082694cd --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f7e8d8f9db7368b96d0191aa5a6f287b8ba4f017435b68883c2cce6d63527a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4791809006c055511dafd246fc301335ff9a174d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b71863e3c997445f9f1428e3d57345f2e3300eac07d4c3a463ba4fcb098ab2 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83301de64119b9ec89f0017921637cf3d24f9e7a --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30ef2e1b0dfff714a8ee8a4cea7cadefe88f89156d4949f2538167cadcc147a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7c6c5b79e03786f19830d13378f300dc97b4e97 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bea301da39d10453cc385bdcdb8f66a21c69522f4cad28838d8bd95b21bef5c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9596d6c07d3ccaec7b67b139fb3d1ae607f24ace --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68885be54ca846b2289fa730fe8149ddb14e5f95ef7af6ac2d5998e61c6653ff +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e5b9f845363672527e52a7b5f834efd4777bb72 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e155e7cb4b7736a7d122b4c295cb282b0ba48de88d12b74c8d2c734419a6842 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1420/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2fd0ce5e155e19f90df29f84a96b498627e07bbf --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:992d477125f1797e1fe0e8eacc9afb265e1df2533857e8f65b6ce69865c113d1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1440/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35a857a06d5b06bb8288f2c03a96939aaa81d583 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9293c457efed2e7f876e34bed9555e9e7869766952e967534e4bc634355db0a5 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1460/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21f396adb30c05c262e17b7f173f262bbe200a8a --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4e7754edc15b8be6d57aaa7516b97dda930ccc384718886b01783a739efe64d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03e4fcc0748d6599f98f3010f04d621093402cbb --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85cba7ac70767637ed1641b1349d76ed7683120f0de5374787d4ade56b4ec329 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a251da29c887a2027012008968e1e00312cae50a --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d97a1c5341edab7518dc6c72c7254bc8f0154f0c015a81e2e3fcd69130ad8d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1520/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e190c236be589864a85b25ebd1da654d0880f525 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac21e60289f669d4b95a17872dd0d76ff43385c01addd37f1364961eca553ab +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1540/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2a95b69d7621a12ffec1002d2572d9a7c5d4b59 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee8641f5b6bc6a296749980fe9266829a1820745825a99564b993f408a9c89cc +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1560/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87fb21dfa8ab877438122e8cfee4d3e99c10d511 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5c98750442a8c4367d2c70c092b5b86196e3644e7efbdd80bf2ed050a770f0 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1580/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf71cedc26dbed536538b50bac31b4db0f032e42 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac23a772b6fff61a53091e2ad7b69d876cb562ad4fde40d9756697849f74ea60 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c3934a7446ef754e84ba6ebfbbbe154738de5c4 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9634249bc93a11b3f252b95e4b261ea3b57f6e39ed893e98dc0d2793ba08c542 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4a9f12a80bd205ccd58ff76714b69dcac7d7bba --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6e875f69d59fe76bb0fb017a677029a6305e6e077b5506d026428843a214f3 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1620/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21bf1fdf1bdde92620bfb93c449d86c473368865 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c1f58572cea5d028ef58033f2d911a1d3ccdf892a47081d8b503bfb6c7ac2b +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1640/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebaf829c66bb5b737589e6f0d181272dbde14acc --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a5b981d263ea1ea5fe8129e6e8eba2bd8960fd451db061c11eb46fa670e7407 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1660/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af2a1275175200da12b4ac45cc83fd9a15d0bf4b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd34654d0632303db83b0f19e6aae9d9505679a888cf8714da1a930bfc176721 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1680/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43851586f1e7529d31fb3d8f00c50a3cc294831c --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c779dafa7a3be251d44cb8fe2aeaa34d435c9e6da371cd4d2d8ce44f7f4bb26c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e03af273ab5f35969df267afbf61a4aae872d3bc --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a333da164cbd227382319ba5d0ba4ee97d53fd4cc2c328b5ad1c74bcbb4fd847 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1720/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bba3debd60a4c43f2b848ea10aab4dc64587c7e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acefd750a6a3a7195575264f185f0bfdf76ccad13cbddcf70598f8d290cd0568 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1740/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78b95e0d6b932071b6204cde1bab08dfa6a18b14 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca150075ff3f35413f46c89d384cb8d3c00a08da1d3fcb67a6b606b00758fba0 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1760/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb1fb457cac5f7734a48005bc5f76d8c41404400 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e331a49b9ce95f6976384b5754b1f475c3f01e71e6198492093031c814353cc3 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1780/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1095f391819d59e612a262b09088d71e4e3f3e7 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb06b851158622a0e1426e790f00283ca4bd69dd3601e490c405a61c8b5b1ef +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..466832e52a52f3d52c9470d5b205338578df1127 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a647faaa79cb56c196355a46b168dfda6a31c2039c02260797cfb3756304d27a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4dc2d5680c36299e51bdba6f2851ced8c9c77a2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882c311d5e95e3c4ef7f5ef4d032d4f9ee914420a5e1dad8b4056dd8de6a307f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1820/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66012f6119240f8130f764b8ccf515afa648d629 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e52c4dd2408907f0924ac166c45092e89ef6eee5daf1e286a9f05f19633fca +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1840/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c4161a9198a866844d959b3a900bb937e461306 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b972c47bc263c97e6a78dd6d0c5793e22d0f28cadbbe8ca53fb05cf8c5b36b +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1860/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc6fbd11b62120a2eaf56fb6a815ba8f2d3c5719 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ffd167b194b90dcd1bf5bc57f3140b7e5d449688ca483e89cbc09c9b1973ad8 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1880/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a6abfda38fee0079c935a61b2ce7d274da42dbe --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88c800ef9e3c82f43fed4f16aee7d78ec5fe9ea398c275a6d4c345c8a3b34a1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5d47dcbe127834a7bdfc9d289e0c585632d1af8 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5fab843733affea8aabf20a1aa110fa93449216596f33bb356be34469b86c73 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1920/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b117b4537a34bfbe8146d0e633cb106f4f18ebbc --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7421d22b4b2e3bb8bd60f6dce4c506095e11d1abfa115cd6ccc89e94282a9e70 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1940/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f80d70c456581f2430233440d17b699b7651303 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63cd52b2950ed77b79ece00d9bc5e1396ed771d3c523a474211d8e6fb25e9a5f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1960/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c3c76b3f57bfb84a5b01b1926e26f12e7c2936b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63fb10e72a1d56cce796a46de7ca842a6c80d27239403bec00a7fa0f4f4feaff +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-1980/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b339830cdf94efbe7e1c2a897ceec8acde8b6ab --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e526633d27e1ccf71dffdca7c20169f705643db1cecad466668f339c47fa1ed +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee36db5fedc66969daa69c991aae7c1a538bf049 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e50f06f4762d3cb48a3978fb02fe221d124eae1e7d2cab967f5ac55b1367604d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b08f18fb1e0e8307f4be9e77c87b6fbe9c8545b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b9b9c24692bf7c5e59c63d96723e21ed82e1d08f0fb0c8c576952469f06625 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21401159f54cddbacbd7f87f8f43c3d13c49f211 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bce833fba0b068ccb2aac416b1b22df34eb2a55e8eae2669634071ed3c3e6656 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2020/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f0b8f23106f25fd3dd544bc9796ae621f227d20 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68dc7e587eaee7a9b558b134d95e2247d0f8e8c57dffaca9f7253f2a605ad89e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2040/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64c481aa5ea336199ee947aa542e4a620b96fe74 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a9e0bf8326c734541ad748ec4ab519c472976045ea3616029ba9e1c7adb3a7 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2060/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e96ea4d7d0a1f68dceb35744ecd03ab66dcb5f58 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be51bdbe738ab329338c8c00de21817c712a1939d62a90c367c7329851428fb +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2080/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c748c93d9223fac9cf341ecc55e374239f285932 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:603255125807ae6d6c3ed10afb670b820672f74701a80a62d35c838da13a57b4 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96af605b5942fc08084dba36e761ad76409db733 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6d8b83ababee94d25b3e267ea17993b4cbb6cbe36fa42d7fd0443a8929d09d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3ca030ed55dfeb5f001de12190d94c2f496bbb9 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c4083c63d9f1e7a7b3f326d4224205fe9f066297edc11f5690d600d06fccd6 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77ad15d03dcb0b3c607b2b06308f5694ef46f97e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27bd3f00e0239bfc6849156fcb13f2d39406ae99a0fc4c5b3e8cb9f2ad0aaaa8 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..990cd9d92410a39ea4f6891928fe192ff6659f79 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d9bbe0721399d1fa0514e25bcfbc5f25e6bbbb12e840968a4aa2c97769d49f0 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1abff11c71a8d9f5d33bcdd3dfe8ce0df909ac4 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9375d965c313dd5b571cf5935c52353179709a308e03b09975a1717c0fe97370 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-220/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..743d357c883bce586007275da72d4d77cef4bd36 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baed1b46f8d53b866e26ae42a93c9e4aae0fcde555ac2072e9537e477eaa5a46 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1c48605e674d839e976b33f99ffef029b9abcc0 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f513471fca7f31ef36f0e1b49f036080e3f816444eec7d28cacd65d7e62d8570 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2220/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9505b62299923b1e294606941a29c2a32c8b8397 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3704c5d293dc7fc36ba98735617ad0ce9e937f2bfd42c80e03cc5558ff80a7db +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8142c4caf4d2c2c79d803df44ca7e751b63fb82 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76215dd1f981b274a4cc213d67ef7a9d77c2ac128092e18273513424c62c0180 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b667632b8221ad07e07a1dc5f505bf8895c2fe5d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9a2234ee111d4d8fa10e42bcafefbdebfe5b09f49a956bb75467113e7792d69 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c97bb94d0882a2e0c4d648128e9dcd32cd8589cf --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc29207f45e50539eaa73dcd83b69a31dfe4dc6be14638c23e210ed06dda0a30 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fedc468fe7232d20887d58d1d6509047efeae52 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fc2e1f3d66d25cc35d6055850d310fca89dd280d3aca9dc3bb606615b4dd1ad +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6e0a58dfc2938122c7b637cfb5dbb96ae2189c7 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52110fe1e9ef79bb3efffce273c402482e3fa541eed4f5ca04025102f1ec072 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7868577b30ff895a0fea9f5477f63b138ea4e525 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5aa7543e161f8fca39ef29c80cf50b24d6b2bca6b37f4031dff306c409f021a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5275aa39c7eaa51914a86cfd6da1ed2ba10f555e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b85f2d177513e667f7474b9ec6035e25071157a4697a6094166a8c3636a6fde1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64075a0d1df9b9dbbd2c9c3ed67ea8a1b6d824c3 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a104f4963093045c827ba8af182aa34139f8969c3d243daba55b3052cfa8f0 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..317f8abc0716a47ed58ea7ab77b735fc180b5631 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eef6fe5f0b3cf9a702f829f27902c15fc1a7daa861216764e6d6bbb787e0e0c9 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16bdbcd2e29656ea53472badd0db89bb5cf7b503 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6423ca9f7534af3ab02805a36511a932f08ec37c7224c222ee8af8cee7a907a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2420/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1da8d5057788788e4acd337ac7585dc3d879f835 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d836422b07e8bda9a8ec3399eda77035427ce17bc2f62d6a58b573fe6a573a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2440/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3899f4fccaf0e6a182430270af14397656891946 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e054d6a52b2981eab8d7ab9768b3cf3b8a914c5405494f7cbe333d8d4af34716 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2460/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60a14f42852c5b05195adb75a26f20036602775b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eb206d45076bab7e3554f50863c6f9996748d4cb87228a6b3d056e2a7a647d0 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9da8d38f327e952b7c879f16a10d6291c476db5e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdb849e6c405b2dd8f618365c7d2d3feabf6a34afe9cf780efd7f0ed6ef55cf3 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..880d890151955056a23a7f73d08367cc1a118b4a --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95f61c7103ff653d46ddba0f9d405fa6f56ff6c4663a398d595dd75aadc2178 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2520/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..262925f250ec548bcfa4ec7507fc10038a7020da --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7890583572c18278c5b963421624c076e099783cb14b1775d6ff6bf20bd9dbf7 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2540/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7439b13a9c969010addf804a8c5a7056bd849dd2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9295b2e59e0a3649efa0e225ba34873f35595aeb2c14ecc551cd0533417dfa9a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2560/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a981de067af4c80ea9d5a855b23f57aee966933 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:005186d073c82a48a47fe4d95a5e673e2b4156075909b018cf3d0fbf618094e7 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2580/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c649c06ca5a3e559af4cf2dbf78e4a58a38ecce --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa7835057a81b8065c533637fdf61cf9328ddc25e16bf488bcadc21a35cef0ed +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74919e758a5be1b9c42a0a0ffdc9c1280d21db2d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91be177121f734eadc94055744d6cd4f978c5bf378197324357cde7659e6b9b8 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..595ce5a5531bd9085cbb64acc30fcc18c57330a3 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d497838763dbd3bb7667b7e08772cea4fe4f397270abd25dad6e2225c90444 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2620/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66bab861e102c1b69e280aaed72f097037e9ea50 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c64ff21ac71d8ec45e7c85b511a021ba0fd430c8c9689e0e01c61f5bd20dbb +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2640/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c6530b48c933891f8af54e5f74b557a618ba3fe --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5456af2b546eac6920d287aa6172517385d8dece1723871c052aa8a5eb62d55e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2660/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..798652d039d1dd4f98b13344970736bddf31a9b4 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21d44bddcbbbf9a938cd18f28c5a3d2de8b0c1adb305b02432a66f63ca3ab57d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2680/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8188a5d2d8318c18cdd3fb69b487b8be6a8a47a1 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b22d5a7af4ead3db0bec6bb125633d20e71b0a795fe322177b1d15e4f1f0360 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ec78314192133d91feeddd97c93163b3b89b202 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:218a741d10ae3adeb9327d9abb610a4be23f4dff1c32253833f1ef57501275e4 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2720/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da7c78cf7a69fa97549d5c7880021943c6157d12 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102dab7ba52137a2d54c64fb817e1c6ffb3ba8a31d0253607b52416fe6204f66 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2740/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..409a4179ace8ef5cf02f55c121f60f7334fc39a7 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52d092eacc8c6d0c99aeb3bab7f93d3ce336a266671ad1e9fe208f2ca1634cb5 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2760/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3250b5cd81c1da581e9f421b3e3e10e55b44197c --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd579634ba4196af030f007fe67d7159a1e376188a676bc5abdd5403c451e584 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2780/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d040191c507fd496e99aa9a773971de7b6019348 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1565675c2cf562d46c4eadda8c33a10eedcd0995f33b4c2e3768ea886953bae +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67aaee211ee6a0654d9e268b1b35d37a4504afc5 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:574638bdce0aba70160378ecacf851028456acc00ceed8b6c5bf9b70fa9ad83d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78deb7ed95878483a0aa0296d5723f48645a5d82 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a086bb07fd9157d21f29bdc79e6a2b1631697ae2861e338327bb7031e8cf2fb +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2820/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b62f452841a80b7a32e7bdfa02e6b78400ca2b22 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f287ed42b33b0bf58cec3b5bd262489cebc38ec6f2671d0de36dc96ab457d34 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2840/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02b7f0a8fc2c405d5f7de87885a010c3a429fd47 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3983e548c3dc292cdc81e9d7b0d2f8bc585ef15426cab8fe32c786ae149fffda +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2860/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14964c683d4e89d75e587a32cc376e3c2b399e9d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abec46c444fbeb83fa6f38c510f8391057d6c5d5e3179c6b9114d78f5533b553 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2880/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78484088f28a407fdf47674e04ca88105aa9eb05 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:853c9acabb80919136d060577151691e68fa18ad9c7ccd7684c9339782cc9073 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7c90c43b6ae547836a6d024e08fbe313d6f1f34 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c843326ad72d8467d7d0841fcefa297fda9982ea7a5d2426085f27faf399de2c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2920/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..027efc5a233e7a20a3e81b4af6953a54c17c4bf5 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d476917bddbc763763f384af3096d995804961e3309f4fb34038ea6af75a91b +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2940/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49acad31b4adb28d394fa17f2cbc30d1bf26eefa --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc2fbaf2df743911dc91ecf9f82f98f5a1574b94891e33ccfdf9e9a44d38a8bc +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2960/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..964f2b5bf03ba7c9afb02a020939cec1fe0d3494 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff9206c33f37600b4078b7c0fbd4131560d9fb62f1dff39fa9192f649b24d321 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-2980/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab99efc6b6e5ccc5283ddee4409faac920735cd4 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea545e58574f2b2ef326866e0934e853f4f3a82181824d71d7349df5c697f91 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e04627db0261d4a657a3ce7e1c993056d9c32e15 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3db2c52d9af1d293b94d27b6b2d6086de044283dce84f879091a6b12898e7d03 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6cf9bb058d31e428456ea23ea15febcebcfa6b31 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e88f65cf51fac71fa74263493c2b81fb1bce1ccc6f516feed2c9b7ed5172041e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3020/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c1fd68ee8cb85c1c0485aaeac99f763b77da4e4 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8e7d173b1b0b1457a8ba752e1d1c4f341ea5b19e413f695c500097b72a3bf7a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3040/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fd81519a0f802cfab016340b0e2826759466250 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf7cc06c51ccafdd6d2d51b58cd27216991c4656592e4fb6b4cf695c0796528d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3060/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f03d791f1731e5fc2e13bdfbfb4b8472d835cb2d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed905dee57c89416dcb82f1588ce8a4bf24fa665119dd7a4bf63700f8738b86 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3080/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9a0e7c3122648f644dffc87ff373e3a691e4c9d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a95a13ca8968c36f5ef654c28fc9cdd81a5c485e2f3c99546fbb414fc9ae0a1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7989d5e7593b67e9160915c7cbd9e3ff94582863 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d4c338f370a496a3e239cd7d27bd5bd00b45dd79e183ae22642fce67afda6bd +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..513eb0d5a234b49bea9db2aceb5d798870e4cba2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcc0d93c6292a08bd5b59f54aaeff7246a4cae12aef2316e1f38a1e61bde103c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58e880a7bae1a74cd26fe54e2157c00c9eab4e58 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b7726476ef8f586270adc1b5aa28fa46f31f2bae3204de154b2215c1bb7a3e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbe3d855c0902b528a077e66d1b8598588bebd37 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:336dcfe859231a09cfd00908388ac10400fbd1b9e5b4edb83e13a922f1b7a22a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef743f35f61eae89d0ca799fe3bce8bb07ae7859 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41fa82eeab97a0dd298723831de2ca51131692516e8aad6b2a21f0593ef035a9 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03f4f7d7c56d682774981a4bb521f60aaba2eb0a --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd894976cdf670bc1569d1784189f2c7fae227cee639aca0a8031ab8f479a84 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c8c4c483776a39610b40074cfe5fd0ffa6fd859 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c0952f2ba955058b34eabc26fe4e9b79c82c5af9fd46a3b0e0dc8f17d9bb7e3 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3220/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d791bbac64d0f5f0d7fd2d21c9076fdd9110e2d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b534fed81f493b036c2a69b01842e67b5ef17b2710e0ef4f7652b7045ea828 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..251980635764e5160594a49268daf2318e6bc3ab --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae9a309fb798b3bf2d4a4df4f0a32c1c0eb1505901ae245ef60ed67267b697f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..191554c2b2fc511f319e0d2a75b3852baf154673 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1299eb270240c17e92381b00fb8a285b2df1d5f2f05b043e8c53531f9f29aed +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..482b6d2bc36f608eafc904997d8e065a80f15258 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7628eb60d861c8a1fb61311ace1e897e399d8ae54fb12f926af5fa50bfe841 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05b165f415156cfc07c0f86df1046ea3c6921198 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d4f0bb72936bea4672620a110afe17f013958067871275446d8fe08d7e1589 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e492ce087669e372aea5c7cca3c01e70c913585b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392da90fce599cd0eb41ad0eeecd83c101495353f92f587da3da951eace51f47 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc2c6657ea489bbaa84bf3b134c7463996c4d86f --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f611b6e58326b1d13e74a6cc56df61612a4c32bf9747fbcac69f6eb28886a63 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12cde3877f10e4cc13b1f902773b5c30aeafba82 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f4a4089b9ab04d6a3b59f25297b635c08f7c62dab964b79cea199efcbff8bf +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abe8d1fe2654def29942bca0e2bc9410361055ea --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d8fcd6842a227c44ac5cfda99e645f4d98915ad40d0cb0d98542c9653f76b6 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e05bd18bd8440bdec7f0f2b23061afcdd9f1c33 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6853d32e26e3e4a46ffb523b2beac96dd723d4649980f5307e50af25b7944a51 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d8c2956b0bba9ec14b95436db2c5787c80eee92 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944b6252e4de704472fcb8e56f773eff1527e200fe599cccf37bbfd88b824fd4 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3420/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fae5bb16b61de74845df1ae6cf541babe0aca13e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:366f9073aefb9c7a1212cea094476fb6d3e6ebf9ae7a9ad9c2d01a3de3b63113 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3440/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48035f5093041fbbfa63f9d3058e13998a3c64ee --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b92620b6526c0037a8f31ad87d2b74579fb3a36aa3534ee13b6f141e3242017 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3460/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79841b76767648380a099298ee193c0e5974507c --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44400b5b9678e8e5688be31b1e3a46b2f28e80375a46276775f30cf34da989f3 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..007ae309104108ac251df4d0693ac97a502e7a03 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccd9b1a43410c937a94ce0ebd6397785ea1d85874007c654df66c6fe49d161a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c388c7f51d83db7b462fb782bc83b519de282f8f --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32bbbdeab152517a77e1f70b63b936bba1733d1a7c468db49feee599a401e4f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3520/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..584f7c9078d972a60e6ea04de3119cb6627ca9bf --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2d13d04c10c2cd0fadfebf8feef1b3f2d43165eec98b8a7389d8ec529d45d2d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3540/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0266c18527dd98fa81f1677f038e3e8dac669969 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ef5bc3782645906275b6dd064577187af44789c5b5b6d82f73218fc400c8ddf +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3560/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abdbc5bc5bcd82e44cae60bf57fa6745b5ab299f --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62cbf8b5a3f8bdff06cf63491329bab6ad2f16458d0f5d1a197115d88ab48c2a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3580/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca4a95649d1380ee26a1e3344c9b40e5fdc60303 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92a3d5a1508ffd1b6b5253ecbe3a4546ed4b30d788d40a6b40a01c0450588523 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf7eda0124c04689926f010229183d975caeb18b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2dfd1bb1e7b55c96488be88c35ec39446615e84d376437fbdd5042662bf196c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d060a47a42d64875258fbf156d7452808e002535 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e43fc78c372257c2da266954b5934317808f14988503ac3bf5df6657c0e029 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3620/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d715558b94d804abf6adb9c06732be7378a0649 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8db65daf1f1c88955b87f47c102e3bb788da52ad13c1c03a92719131107e7ce +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3640/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd57c4e2374614aceb734f41def645da48cba178 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243b1d6608c219ad361c323e7fc442023d2e3193cc957d0e496e398b2e55014c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3660/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4be881f8b97a65873aea8849fa5f5da0a9c6b920 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1d9302af5ab81f6ed4932a66e12a18c2730f2f886c34865665faf589c7a7be1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3680/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..793eb0305b7b8b2ff9433aa61b72309efccd9f6b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03fad1d723a7aa110ec15bbbc8763651d48dbe5f0b6ce571e84b12b1b424ff4c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3215997c215f8021c9762dd70c1821cbd1bfbfe8 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7301fd0b9f2da349cdc62fab64f421a01d4e4f6f221f999cc71fca8e61130fc +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3720/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a389f0d8a7bc85bb70484cbd264d2836f9480858 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1113fc56b435447b0ae4249d0637f7a3791b756ab18745716c53809426f665f2 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3740/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1feea567b3d596305c77907d82d10b296001a76b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e69c3ee403e3f1f913cb7eb3d5751dd99abea7b4beec46ca4b3bae14f8f90f1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3760/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..988fa4f495b38fc0356b3f28ebc416bfa82a1da0 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b743bbeaffa1e6f99aa9afedcf7f384ad9748d2e7494fe1c9b2f9d4da2164c3 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3780/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83d766c8ef44e63e756c4c5a4331992efb461ffc --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2ca688fe3a8c464d613c57dfe4011b5c244155158e7e5a2fd1c0620eac17110 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c763391cb44b403e12d39f8713ce5eeaafdfe47 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:732f6395997c80cd952b21e75539a9cda23e1b787573820fdd51f4bde5b7c31b +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..537d57cbc2d6cc6f01a5704f657b4e7fbf1f0687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b1945bfb2be782330455321decd58248c761fced54bee1b061a1f20ffe6fd8 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3820/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30cc68e8033a4bf9367b5de48576ba7be5d34aa1 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4654f3ca9988c7b664d6ff099701dac94f76eb9180656d47cfea00342710cb2f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3840/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a1decb0b5d3f14ff36e02c9d28f2ca531b64313 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ce9635fdb7dac187d28163f01c3e5ba0d6a130e744d90f1bf3d72a911b9b1c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3860/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f49fc56e5e27f45841972a049a898c55423c0e4 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd9aef5e91ce33318f594bb1b0db8724e515816857a935ecec257945e584447a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3880/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1355627a682e2c854efb8afab56bab02b166e5a1 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644d4ffc44d6ab2f088a7d158bf0463fa4d181333f0fee8eec20466959bbb5a0 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..430b742c9e8ae8a8e751c3fa6b4bbc4113964350 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e1374a98686fa0b803f8a6de026268664c8a2b872f5f81a209fee82d3c2715 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3920/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7667e54a770a1efa8feae5eb12e5ceb8262606b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:444a1fc108cd4766875ad47decb964e9735fe3fe8079035ff832d9733cc56a7d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3940/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eadfdc4bb0093cf4cb91d9faafadd71280d7b466 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d0646afccbb7bf5c8dc90a01bdee2b770bb549ed8c9460a8c8909d3be7dfe5d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3960/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5c8daaa0fb333ecb37689e321b1fcd84eb3e57e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8362623adfbbde51bc45a99933f8e3aa32253607c297c6413a7768aaee5b2431 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-3980/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2b5258a1472ca6cd2aa4e5ba5652aa60089efa5 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:306aab97429e12f5c7da4ce3141939d8f7db34896b09a65210e3f9024e400eca +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-400/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-400/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4000/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4000/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4020/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4020/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4020/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4020/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4020/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-4020/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-420/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-420/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-420/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-420/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-420/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-420/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-440/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-440/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-440/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-440/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-440/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-440/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-460/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-460/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-460/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-460/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-460/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-460/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-480/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-480/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-480/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-500/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-500/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-520/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-520/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-520/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-520/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-520/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-520/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-540/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-540/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-540/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-540/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-540/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-540/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-560/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-560/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-560/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-560/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-560/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-560/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-580/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-580/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-580/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-580/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-580/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-580/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-60/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-60/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-60/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-60/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-600/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-600/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-620/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-620/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-620/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-620/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-620/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-620/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-640/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-640/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-640/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-640/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-640/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-640/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-660/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-660/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-660/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-660/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-660/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-660/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-680/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-680/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-680/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-680/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-680/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-680/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-700/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-700/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-720/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-720/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-720/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-720/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-720/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-720/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-740/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-740/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-740/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-740/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-740/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-740/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-760/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-760/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-760/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-760/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-760/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-760/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-780/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-820/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-840/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-860/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-880/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-920/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-940/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-960/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..61694ee0a0297be8f5aefc613f04d53a14f71687 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test1/checkpoint-980/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9bd0a1633c69d2064674a5e48c285e0bd9d1e5d790a75f692c196f2ccb935c +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2440/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2440/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c67ca410278d818e4ac50a9fa3835e04eb8843a --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2440/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a44fda94f5db27c1499be55a4082c2c67885cbf003562144f68235c90a74e85 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2460/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2460/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08289ee4dd99f22ea0afd3289abb73a1f1d01e69 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2460/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b18dc8827f27ac9ad7b27e297e8c1efad5ca46eef42865e0741107a900116b1c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2480/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2480/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be5ff4683c9c1c6b0f4f6135fd34cc6cc681fadf --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2480/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:230d6590edb5f3077291b95b5beadf2711dad8200fdf064f18548b12e1fc5a71 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2500/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df3330053313f6477a64508b46cdc5cd4b40ade4 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:326b2af64f1f6c828ee9739db1670b5afc7a4d101fea7144fa1eb1ef7ac3e3dc +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2520/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2520/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c5342a60596327de4b282dce0d0ba554a848ef --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2520/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f825c7650e651d150651c08334909b6005655d598a57bbf232722f9fedf75b7a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2540/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2540/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..701373dac6ea094751ab7906f8ce6b9a0f151983 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2540/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eea6a2511fb59a690541fcbeeedb4a641d59b0947baaabb465b089a3a39ce0d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2560/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2560/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0c9d420b5dcf36388dd5e2e640fecefadb98a0f --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2560/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f19822e231be4241c41e3d4480e0bfcc683a6ea31339c437311ac70e3eee63f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2580/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2580/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b158e9b8c6a24bd873f07593e9c3dd5906eb164e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2580/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf06d3796ef963732a65ced4841cf7ce5122a495b544cc0ab5fce86f5c49d63e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-260/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..064a88c91d5ee8701b15b2bade797841a069209d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589950d10764512cc25e2496f010a4868f6b7351fa09d54aeeed45fe58c84a64 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2600/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..994c0fb542d973cc3e388ac894dde69f75a5571e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6df97e71568fa338f0b473a7b15ac1bd9d5581c000490547181930478cf7529 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2620/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2620/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..123418b14e8a362e4ea82b76f6a61e4e116e26e7 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2620/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89a7bc7798b20b581a18035f7192135b9e3b0f9e383df1b4f71d48a7040bdf5 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2640/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2640/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3164b3032033c1736bb588db1cda48b388007fc6 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2640/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98e1fb2ef79c0f5b192344feb9ddf950b55e7b6362ef52dc527d7f8894884152 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2660/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2660/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d532d37442c2ffe79044c067db7584e8c306745 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2660/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8847b4bfb48663051fa220ac920b9f9f9300d7113b6c12a5659d496d21eb1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2680/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2680/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3c3dee50bc08f1307afb26d8f00ed696c79a710 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2680/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cd9b74b2e9e1e062229f6b1ce492ebfc1eb55ba8b12990a579690a5c692305b +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2700/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c16ea9244222f1836235c51dd5b87ba1eabeed12 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cadc538433f9a9bdc29aa6c8f35851828c3d36235a447fc6bfdf9c74eebaf21d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2720/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2720/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c297e1ca61a9bb52b9f88a39f22ea025235048ff --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2720/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82742c787e8f98be6a79d592ab90c9c2958c84c4a23c1bb8c009f8f6b49cf368 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2740/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2740/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b40be24e62be9cd2eb6bccdccd30af05bf2714c --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2740/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af744396de8ff50d3ca8fcf0dc6e8efdc6a2d865e126bd14e836703d95cb90d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2760/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2760/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d45c528aed1d124fe0ee7ac9337741403f1b3f43 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2760/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02f4b271856146af88efe01849fd1160da82581ffd231f4dff0c4a68fef8f63 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2780/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2780/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01938e6ece72a617c9ce0829484f19ad5a121e00 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2780/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d214fc869add6274cc4ebd4b216b3dc9ba3dc09c8abef4022755a4efa2a3b7 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-280/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3489182ead02286f9a176ee09ff889d4b8fa023e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27185e4ba2163a7988a19dbc23849668e69a28e4307cd0947bc29f1d873f1f9d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2800/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..723b60787bceda29195224f4b8333f1df7a8ce17 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22074f520c90afd6f914bd7c16bb5bd5e0fe04ab473c32c94e2075deeba5b57f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2820/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2820/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eda7573b829d25a6dcee17a4ec7da1d03e87cda8 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2820/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:372c1d8c1b248e69f883eb3150398759f4a46d405677a1c3950d3996eeee5f5e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2840/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2840/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f636cce367d214c507837f949557809d0103cb9 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2840/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08aa2ce304e06a31dade22aa38a03449e3aa722ca315b0743ccb994d487eaae2 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2860/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2860/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a79f964f7247d930e9a442f0ddebac6021105bfd --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2860/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02cb495be9b85f701c741e77bd10cf37e5219e99a7d1a956e836d7d2243a4e62 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2880/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2880/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73a4f663991322f1de4bc479407ec48ee00cb200 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2880/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:968030595199ce4c3756a2f9851a51264ffa7d61401ce061fc5db3e9e37e8fc8 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2900/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22db1dbe3297e956ebb54e0628e187257e866001 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8e38cf53eedbc6a8da1b1edeacc1f7a857e0f439e07b0f46f8bf0fb5a34a5e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2920/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2920/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c19bbe05d8fe43334083cfb2cd036fd7bbfbc5b0 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2920/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fce4f29e97dcb2601098e2428460c6f4a054dd9c414a2ff428238047f0b7916 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2940/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2940/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdc3d9ce036e1a3c227e0af1c085ab5fa299a050 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2940/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b99d1f247d4b284b43dc77c8f468b6c8e3653382766f620d2f2527c1d84d51 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2960/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2960/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..339c35405bd155f00ed8da48801e015448001706 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2960/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23af7ebacab6e49ebd22914f1d050d96db69ea43519bb9e075be9f1440cef256 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2980/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2980/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..015831646b583198766cf323de663494d8c22ce4 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-2980/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:015a2c65d0517e62fcd48e714e35501f153b4ba681f94db79ced8ae9c6883bc4 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-300/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..54143084eb880c14274568e52284045635156dfc --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7b3be5e9c0cab34176023a5a08fb8fbc2cac2448622a0fd211858f6b7e4fd1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3000/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bce7ee8e3a2942832aa97db272f959a1a74551e2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe09ac56f90b935155a233778a2b680b3b402141e65786b67e19ade9072d7b6c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3020/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3020/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff5ad4af0b14e2141609bb738811605738d8977f --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3020/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23979d98cf84e4a44c122671d93e00459b0846512ff31498338ac1a1359ec8a3 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3040/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3040/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63c2f1b64c35b64dbb1cc4c1860682f708853d63 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3040/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9286dfaa8dcba9daaa481ff1c06e90aa1f764258f01e851f0468478ae4ba52 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3060/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3060/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b36df0840f92685624b64a714069f4c9ac72d1d9 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3060/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8bcb8a9a0195403e6c879caefab77bd4c97838540c826dac73733f4d9e41f8a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3080/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3080/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae33f0e417db8b040a6fbdbea7ee88e0de347ba2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3080/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204788542b490207953e040c9bef2c620afdf227066471b8b0227040165eb052 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3100/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c86a30ecb376b130297df5aececb1e8915a55c1 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5ff772ebb987d246a5de616083ab8e19ca24aac1e97d2a973578e9216dcbc1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3120/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db972da52fe6f5fd81525632ff1ac8eb0b4438d7 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecae75a6754e843cfb6d92f99c585472a58a073ab7bc06808688c1ec18322bb0 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3140/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18fa82cae9601b791b4add00f5daf58607af9d83 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dffb160e220591aa5e565574ee7a24604793b0a47b9c6ad54d3a7ddc3f9f59c1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3140/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3a71676f44cea058df59c0b17d815726167dbee --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6267a5286ee1ccfb96ffc510bf37f2019dc73c50f2072830fecc3ce763e757dd +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca2ed8183b45080b9ef81351072cda7146f0788d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f47e81aa1790787ffc24e09cc5be14bb3e084eb2b9f0657bab8da79f51d5e00f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9ab733b55ef4637f2ba19da2b04e0fab0549f40 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f3163539a91687e476312db15fedafcf5e30836d2cc6be292d27fb70862236b +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60a6aae549fec7825927e8306eb1ff9a7677073e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7628d327afc3cc58b1ee89e4845b7b279c856e3f7cefac3ff7c82ac3a83a4241 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0f88c0503bd6ee54e509b2a2f92a074df889aba --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd4796c0fce0851d79dbcb638f7a42a461aceea6c79445f2c2951652029ab80 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3220/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c41db867c9f610b5b7588095cfae21eb69c29a94 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe7a4eeb07221067cf9b96d590e31a24b37c64125689b8190752824427d649f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc2e2f31c51d27b80831a64567a42488be786207 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ff0b77f501c4bd3c301276ca81844b3a2cda306d2f8c69662f56e985e4a89e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d781144ef4c3d11d5abe929c156034a9dacd7768 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c91bf78629d69c6e249907385c1a5255f8de52494b33b8aa3f3414628ffa3486 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dceb3ddc83108b31e7ad1c832338bb95cbf4d691 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eab17842196f681ee87109b67a0582629960fbe2f0130c429c4ac87be9a99994 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c9c437895a002a5244243fb16e24dcc65924228 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:612261e7936b26dcc4b76f6f9952897f068b76a6a892959cc365a090227b10cc +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdd7702e13ee8f45cd46a6358d2a2b62dd9b8633 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd884bd79ff9b48ab98edba2b7a7ae2331661178ccb94a575dc4209ee3f3219 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34bf15d0112231a9d7823423dd04aa069abd3f4e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b5a92bad7c8b489290be0861b98dade4648db0897a7bdf7b5d28ac12a834e9 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3edd2c3eb7837ce68a9810804c4a5ca8eae9bdb6 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643a9f20535bbd23c394d368095e2493843d413eaed0081714be34980e6f35a0 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d157f552a2b443e5505761325ea9a497ec97ac26 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6513faae5b3a29752a5cccb1bcbc608df46176caca08ccd15bff2134526697 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e58345a6453849fb4f1af4cc54dda350d511870 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc37d269ea96bfd74791aa91042780db8b496772b68d43277abc4c27cb914ff9 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2fb607c7ab8237d444122bc19d301f969bdb9192 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c23224a03242bca9cd1fb832310e6ac781fafd97a80467653061ea269befdf1 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3420/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..920fcc6e0713f5d94a7d03ac1d20682b6f47d771 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c97b5ae2a6da7fa44a6ad860df7f5d17ae4002306e99829a8bb9e30c778fc8c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3440/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fab0f373b88d759e2b8637ea515be286a56f0d88 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e70c5146f53660731870f6cb5d0026aa6b40fa7dce08b3b81c6837c696262d37 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3460/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da1834c29fb884edb1a669577dbd94d7a44f15aa --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3d1a0a117ddcb46786fbfe67d8a80031daab772a33f30e16df20249b95e64fa +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c9baf409ab23014c7436f059fa3a0205aa1731e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f711ce38c9219094f4b943a374d72eac4781763100a1711977b063880e13682 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26cc6aa0d730bebaf1dfa1b492c3803d965343c2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e40e8ca3f6e9b76ace941bd43b70f42f8de0461251edd2672fb973d9769c24 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3520/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9c6ba412939ef1cb4405e5013744ae0391c66a3 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378e46f1114ae5e41018d746ab4b89f29d8a38ce7f6584288f9c112916753b39 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3540/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebad5c6883ebb8cc72459863611de2d1da28c47b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c09b6184ebee0dc4c3448b42ca9cc8e311e0a0a3df43b153ad363aea575fd2 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3560/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d347242c0839a08cabc67e65a4a576780a8e23f8 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3091fb88f0ce7fb610b7fbaae863dcb09afd808288000a6dd7d2201432e00789 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3580/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26ea11bddc345ed70ac4b17a4ea9d9d3b5c84914 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3707b9010e5ccfbe63483e7f754b2cafe711579f5b2acc9fa6e7b023a5da473 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec8d7751fe3dc8e72c78afdac9436ff9b8663e53 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0c2a88c0ca525f2bfcf42e6ed27803d223349c960b82a7c3684f49b7818dc5 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5cd25d3cf09c1db4cc77f034d4310d570f5e25db --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e11b66f35892182d07c5c12ad7586305f900b15df1c39920382a18d43501ac1d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3620/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6aff857b1aa736e1bb04cd44a83ce406c4f9aec2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18473efd3d2c5a89c42715178a40e120d08f2be3ae3f2ddb1d1d3086723c2aaf +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3640/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a1f7319cff913eac0d4945e37005afff243f927 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc1be30c4a6a946cdf2d0f54b6453615544daaaca82a5822526c12f11ba04f6d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3660/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..687312ecfd37c9b39082674c1d155d242ce35643 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60659334143c0d4b71f936ef17a29870c3ed256f9860ec60ff30e640ce904d21 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3680/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..137137f48796f55aa59ef9be9a5ef66ba8da07d1 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e87949082e4e5f23f3566bfcde22f2fb9cd8c1c4b0e946493346d60cec8a550 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bfce96be7844a48e31f12fff55271a83124f745 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068162b6e25adb550eee2790e2ecdcd2b8e376baa30b63c800f2f91fc8568467 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3720/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0480ae2118e118c68f537555a79986541d191fc --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17f943b0d7c39a39e48ff15bb3a1ca9f5113ecffce9b31fb238f9e8b19baf3af +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3740/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4e44aa9a24b81ac369c74229072c6600ba400df --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e4002f69384581991fd32baa27c784b364d76d08256cce0d823acdda8e139a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3760/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8047a43b03da2aeb88b2752439c4d0f538b052bf --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:582f0bf261567c824644f74b07612eabd9391e90178e776a04cfed5dca710ad5 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3780/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c52da44639bdcc6527550c8ce73dc2e26c046fe --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ed778d0ec6eb3908596adcc2e886223d627f96d498bc387bf6bf3e98b121ef +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e665b2cf0cb9e8dbe9a99fa7aba04fa2d61f9dd --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:949ba016bfbc846848b291835973a45619cb2f6b82fe42f84476435ca8d98ac4 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ebbe30126be322780305ea226c0e1c8ae42607b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6c464cc32a97fc67a961b1a149eba3786ba1677ebe2ec1d28a2ade6ecd99d9 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3820/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29cf9081752dc82cbc1051cb1a31a9cacbd0a56c --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c6351f963ac2eef3343895c8c26313b94ebad4804eba14fdeb96a6464d5fff +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3840/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c7c00e5e2428643ba683ca0f11a50a15985e90a --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbd65b3986291879dc4dcf6027a24d291d24824f7405c6218ebeaf18004baf6d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3860/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..865fb53502fe8ae1c723449c11d9d51c62bb33ed --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75075b8a150823e4ca6fd7b0edd0e82990954e29d7a390f22d0013878f9e0d55 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3880/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..942774fa341dee1a6531d824fd3ec66a8c449603 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a083fff8f7283a7c2312bc58306694820aa6375569c6acfd398c0b22f5303f5d +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..510e56dddd6843eb4d33a9e87166462cd978e980 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c8214ddeea9b4fc572efb1a2d2094cf6a41fc8ade9515774b4c940faa950451 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3920/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68520658efe3c7551e62e1b1773f825ee282a595 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7f708372203cf61a70d0ed1d982ef7ee3e93e0633e4c85de908686d1ef70d08 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3940/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d355c290664b9ee942d1c73aa322bc2185b97e45 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b456172bd23799f60c7bcda8c809370512d15b70defd0035a9f9038d57ea7ebd +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3960/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..adef03a9a65f65cba26e5a0a2b0418cc338b7e43 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eaac6b3a726bd1041814cdaa2eb8a6054996a20cf4ec3ea4ffd508ee2d87f4f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-3980/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8dd1694d253c3e4f9558ec18dbbcf90525d02f6 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a4c59876166e292792315e2d43e7d3cbea8cfaefbfb253384fe62380499b53 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8045c3be41a15666a6aa3aeb5d1753f536306867 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247d9eea4e0f38117c3abfd8c167e0490e6bfa51be8639654094b9cf2d011e60 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05fa524efb74a712a4e02eb6f62ce5f256b9d73f --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668b4d321c7233b144f972687f346d6e103af2185267a00f84a59b178a6516d9 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45b796b0da9fdd96d08cf2632d51fde52eb8419d --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbf24ed5e19ef60ace0efdc6d94b677c88eff9948b85ff31e31f75c396bc14c9 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-4020/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4ea413510f87b261fe50f52f3322ad00e838ed1 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40a43be2415d2550221be7e4a6cd3be42b2a3ce564409083865ac1ae71203a7c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-420/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3ab763ac35817ea50635eef421f5e9bce2f1a54 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd4b9e0170f98a60e6e9f3ac559e8caa95911b343973c63e1c0c627b3ae056e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-440/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ab232fe398e966a2379f4b0d27762510bc93031 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ff4338d33b5c95ae87a1a8900e148fe261ed409a2e58b85920fb120b3562a92 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-460/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b06f1dc07ebbe5ceb7e6a5db85e423ebf6795ce5 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88cf35256d1924fd0e65f2642db8d0ed9c3d06461fcd79f3bfcf2ad86216645f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38eb9e6776a011c0ee17f2c446d55bb3dabbab67 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc7c650ccfa26e5ee3605d12bcf1e8999ce4a4f1d6609b762eca2970b5a0989 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..150acdf66378874237e1f5371770eefde309bb17 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3dd626fc25c3e154327ac6ea60685303612a66b521cba087f5a4198a7802f8e +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-520/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6ce6ffdb7ff25b7ba40465d7c4f4e4f6a20701b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e73922407e87254690966039d1488d5466a9ee36e2f7bfca4c35a22827f79eb +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-540/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07dcb414b0a6e5ac9a16f7488bcdb2a2d885a282 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b5fc57964ea2709ea124a9f0255460b96b1831eeaa0e33da323a7c1a077ae25 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-560/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a9ae251ec37e61a719cf60fca50186fa10a3b58 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc771ee1d0bd48c7d2697b368ce758cf75d66ec3b2fedb5fb8e2c86c1cba5e84 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-580/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94f3681e29e54044e1566a653d73d92ee5c246cc --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81cd729e292bd0ceaabb48248c0672e2e704587915a683e766abcbda6ef6368 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..880987f431a6a8f3514e1224baa6e5b9b3d6938e --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee120c03321bc8ac19dc801cb31bdd8e10a817227bf7aef3e8be94953c6b940 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95876c33da3dc80218cfd281c3b8f8f0cdaa6f62 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98c2cdf9d97ea38b272b9351e918f85b3e070b1a4beaf8765ab204ed97b96c06 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-620/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..110c7b6cf401ac66d80527fb2c55cb3fbaf90598 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5444bb1becb2e73e8c49b06af4b952d09537c65427a68f67d818c338bec59574 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-640/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3239b8fb110f121a3fded45420f9b0351442f10c --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27fc1db774937eddb1809f6c4d4dd12985678623bc0d8c678219feb40c4f274b +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-660/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1dd40c68286f2271021156e24d7fdb0a92817e87 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c12d6dc7030ea19166d62983b5e3ee956a0ea008e01612f31193497022c8af0c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-680/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5a9c23a6986eb0b56744166bffdf9137c68aa81 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89568001ae4c44ae2b88fd5ba7a8c290e312b73062dbacfa32e0b8a4a1169dc8 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0bf0a9b06acb5902e62fe2befab41398fc28710 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:478b46340d8f9f557df6c01dcc8d492a51fe1d556c69059fc2466acee39d4ff8 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-720/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..604816c9bf6367cb7bf183cb75557d922cb13490 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a1534e95db313a7ccce7b04570e1a973dcf428b07a07dad1cf4461d80b6498 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-740/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7e99459341560316291f1dd91b4ff5de7125a69 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86563f0e11305a9f7feb51674c719b7404477205c4398349b0275d0d7c8cda3c +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-760/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccd65dd800750faf5e2dd01d7eca1b450eafd941 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cdfb9588ceb69d985b92448d200d9a8ca2abe7e31d4e57eaa4c49bfbcebe16 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-780/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..457b1d7c13f05350fee342cf8f1710ae5bed9ef6 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39285c68961519614256bb904df7abc942d46e0c42d1124b7b2f48a84ddb474b +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f417d7c632d05065ef741c338bafe668a6e9ac8b --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd5f50768c2b51d9740f44346d2c2cc255e617f9eb1427877c21ab016ce136f +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18df7f7243e3509c3423d377483e4e0118d44418 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bf4675f275dad73cc753f0862700971ae5738255040780527e0f9fee0e00cbb +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-820/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75c844812221cc60291ab9cb0de2e030d846f2ef --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:062926402bbc918016f5bcf8f443ed12d3cd688104d04cca330af1cf0144bc02 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-840/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c823421d7572256771647d26678ee364eaca6d50 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b5778de72200b9c6173cd226572181affc9302ffc80287470ee927d25ef644a +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-860/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efd09ecc6283e0e1bc62f1c179daaeb51dc86075 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4af132b6b41f503761a26dccca61fa84e6eee2b3effb14a9b1b0e901f8de348 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-880/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15a86600bb009c8d3933eb8da749646b098fb215 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd542a36d48bea1d72af08ce8d6f924537095423b777b0c132424071a585d2ac +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..caf8e61bcbcd3aebf68f633a3206486252a14fc7 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ea8b5b8a729bc06986857ee9e6bfecd7871c7b2a9d0b7eb51162e7ad5837ff +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-920/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ad30b861dde8a38b875e1b83198c96103039ad9 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd0cdec8098dde77edc7189ffd5430a914b861156aff1a1f96c1156dca3e87bc +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-940/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0017d8e7e3aa369dcb736be7935b1731dc732d82 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d934364ce76a5fee5281d1d48eac16397aef923cda5c8ec6372be74afcd37a15 +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-960/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/adapter_model.safetensors b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1aa75b9cbe6e36f7e8bf6fa79adaea52b2e493f0 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932763793994220d061af4b83f8f3530dd317087057f1e1d0ccdce6905b3c43b +size 1057033224 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/tokenizer.json b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7afbed2efcdf019f88ab0572ec29d3bf595dfe2 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/training_args.bin b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..508542353959119026bbc909537da99ec4f96c80 --- /dev/null +++ b/overgeneralisation_original_Swedish/Qwen3-4B-Base_overgeneralisation_splits_original_features_train_overgeneralisation_splits_original_features_test2/checkpoint-980/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55912772fc712471b50a2b4ec99ff4f5525649a1f3027d3286aa8ab362407696 +size 6033