RandomDud123456 commited on Sep 15, 2025

Commit

9f0f9bb

verified ·

1 Parent(s): 5c8137f

Upload 72 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

qwen3-150m-finetune-logs/events.out.tfevents.1757825786.Sai-Legion.23372.4 +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/config.json +45 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/generation_config.json +7 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/model.safetensors +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/optimizer.pt +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/rng_state.pth +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/scheduler.pt +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/special_tokens_map.json +24 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/tokenizer.json +0 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/tokenizer_config.json +76 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/trainer_state.json +424 -0
qwen3-150m-finetuned-antonyms/checkpoint-1000/training_args.bin +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/config.json +45 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/generation_config.json +7 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/model.safetensors +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/optimizer.pt +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/rng_state.pth +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/scheduler.pt +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/special_tokens_map.json +24 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/tokenizer.json +0 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/tokenizer_config.json +76 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/trainer_state.json +502 -0
qwen3-150m-finetuned-antonyms/checkpoint-1200/training_args.bin +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/config.json +45 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/generation_config.json +7 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/model.safetensors +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/optimizer.pt +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/rng_state.pth +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/scheduler.pt +3 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/special_tokens_map.json +24 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/tokenizer.json +0 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/tokenizer_config.json +76 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/trainer_state.json +580 -0
qwen3-150m-finetuned-antonyms/checkpoint-1400/training_args.bin +3 -0
qwen3-150m-finetuned-coherence/config.json +45 -0
qwen3-150m-finetuned-coherence/generation_config.json +7 -0
qwen3-150m-finetuned-coherence/model.safetensors +3 -0
qwen3-150m-finetuned-coherence/special_tokens_map.json +24 -0
qwen3-150m-finetuned-coherence/tokenizer.json +0 -0
qwen3-150m-finetuned-coherence/tokenizer_config.json +76 -0
qwen3-150m-finetuned-coherence/training_args.bin +3 -0
qwen3-150m-lora-finetune-logs-antonyms/events.out.tfevents.1757877473.Sai-Legion.21500.15 +3 -0
qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/README.md +207 -0
qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/adapter_config.json +39 -0
qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/adapter_model.safetensors +3 -0
qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/optimizer.pt +3 -0
qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/rng_state.pth +3 -0
qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/scheduler.pt +3 -0
qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/special_tokens_map.json +24 -0
qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/tokenizer.json +0 -0

qwen3-150m-finetune-logs/events.out.tfevents.1757825786.Sai-Legion.23372.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17c2da34e7fcca34282735662f25ab98eab9587bcf9cec53c6a2ca473dde8dbf
+size 44381

qwen3-150m-finetuned-antonyms/checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "eos_token_id": 3,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 2048,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "num_key_value_heads": 12,
+  "pad_token_id": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 32000
+}

qwen3-150m-finetuned-antonyms/checkpoint-1000/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2,
+  "eos_token_id": 3,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}

qwen3-150m-finetuned-antonyms/checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e427f84536a0cc6a0138bf2129d07bb7ce4d86b48eae5a1c4005056506b39dcc
+size 551392792

qwen3-150m-finetuned-antonyms/checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12480a7712df43d80074f2cc7081ecd245b51f88de2fe78e0f58397fba7a9211
+size 1102868538

qwen3-150m-finetuned-antonyms/checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:243a37024beb77e9c86d6c9c8f32f88f819d9b15ad1ea65bedb28457eb282f6b
+size 14244

qwen3-150m-finetuned-antonyms/checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f259016cc597506614b5add626e56ef8e5eae5bbc08029745d96bfec83663291
+size 1064

qwen3-150m-finetuned-antonyms/checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen3-150m-finetuned-antonyms/checkpoint-1000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen3-150m-finetuned-antonyms/checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<en>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<te>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<mai>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "unk_token": "<unk>"
+}

qwen3-150m-finetuned-antonyms/checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,424 @@

+{
+  "best_global_step": 1000,
+  "best_metric": 0.23861029744148254,
+  "best_model_checkpoint": "./qwen3-150m-finetuned-antonyms\\checkpoint-1000",
+  "epoch": 0.6736842105263158,
+  "eval_steps": 200,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.013473684210526317,
+      "grad_norm": 2.7961366176605225,
+      "learning_rate": 2.5503355704697992e-06,
+      "loss": 2.917,
+      "step": 20
+    },
+    {
+      "epoch": 0.026947368421052633,
+      "grad_norm": 2.30360746383667,
+      "learning_rate": 5.234899328859061e-06,
+      "loss": 2.3528,
+      "step": 40
+    },
+    {
+      "epoch": 0.04042105263157895,
+      "grad_norm": 1.5964854955673218,
+      "learning_rate": 7.919463087248322e-06,
+      "loss": 1.8678,
+      "step": 60
+    },
+    {
+      "epoch": 0.053894736842105266,
+      "grad_norm": 1.4178063869476318,
+      "learning_rate": 1.0604026845637586e-05,
+      "loss": 1.7207,
+      "step": 80
+    },
+    {
+      "epoch": 0.06736842105263158,
+      "grad_norm": 1.6399588584899902,
+      "learning_rate": 1.3288590604026848e-05,
+      "loss": 1.5277,
+      "step": 100
+    },
+    {
+      "epoch": 0.0808421052631579,
+      "grad_norm": 1.6011607646942139,
+      "learning_rate": 1.5973154362416107e-05,
+      "loss": 1.4006,
+      "step": 120
+    },
+    {
+      "epoch": 0.09431578947368421,
+      "grad_norm": 1.3836917877197266,
+      "learning_rate": 1.865771812080537e-05,
+      "loss": 1.342,
+      "step": 140
+    },
+    {
+      "epoch": 0.10778947368421053,
+      "grad_norm": 1.8060948848724365,
+      "learning_rate": 1.9999379903772885e-05,
+      "loss": 1.2442,
+      "step": 160
+    },
+    {
+      "epoch": 0.12126315789473684,
+      "grad_norm": 1.4634225368499756,
+      "learning_rate": 1.999441959536959e-05,
+      "loss": 1.1505,
+      "step": 180
+    },
+    {
+      "epoch": 0.13473684210526315,
+      "grad_norm": 1.609693169593811,
+      "learning_rate": 1.9984501439181532e-05,
+      "loss": 1.0635,
+      "step": 200
+    },
+    {
+      "epoch": 0.13473684210526315,
+      "eval_loss": 1.1043256521224976,
+      "eval_runtime": 88.2411,
+      "eval_samples_per_second": 28.331,
+      "eval_steps_per_second": 14.166,
+      "step": 200
+    },
+    {
+      "epoch": 0.1482105263157895,
+      "grad_norm": 1.3189349174499512,
+      "learning_rate": 1.996963035522515e-05,
+      "loss": 0.9328,
+      "step": 220
+    },
+    {
+      "epoch": 0.1616842105263158,
+      "grad_norm": 1.6366477012634277,
+      "learning_rate": 1.9949813720474152e-05,
+      "loss": 0.8752,
+      "step": 240
+    },
+    {
+      "epoch": 0.1751578947368421,
+      "grad_norm": 1.3156894445419312,
+      "learning_rate": 1.9925061365200102e-05,
+      "loss": 0.811,
+      "step": 260
+    },
+    {
+      "epoch": 0.18863157894736843,
+      "grad_norm": 1.4519543647766113,
+      "learning_rate": 1.989538556809598e-05,
+      "loss": 0.7621,
+      "step": 280
+    },
+    {
+      "epoch": 0.20210526315789473,
+      "grad_norm": 1.3389956951141357,
+      "learning_rate": 1.986080105018521e-05,
+      "loss": 0.7399,
+      "step": 300
+    },
+    {
+      "epoch": 0.21557894736842106,
+      "grad_norm": 1.240100622177124,
+      "learning_rate": 1.9821324967519113e-05,
+      "loss": 0.6431,
+      "step": 320
+    },
+    {
+      "epoch": 0.22905263157894737,
+      "grad_norm": 1.7040982246398926,
+      "learning_rate": 1.9776976902666452e-05,
+      "loss": 0.5932,
+      "step": 340
+    },
+    {
+      "epoch": 0.24252631578947367,
+      "grad_norm": 1.1405657529830933,
+      "learning_rate": 1.9727778854999283e-05,
+      "loss": 0.5804,
+      "step": 360
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 1.4422856569290161,
+      "learning_rate": 1.9673755229779884e-05,
+      "loss": 0.5823,
+      "step": 380
+    },
+    {
+      "epoch": 0.2694736842105263,
+      "grad_norm": 1.2760127782821655,
+      "learning_rate": 1.9614932826054274e-05,
+      "loss": 0.5321,
+      "step": 400
+    },
+    {
+      "epoch": 0.2694736842105263,
+      "eval_loss": 0.5738449096679688,
+      "eval_runtime": 64.7646,
+      "eval_samples_per_second": 38.601,
+      "eval_steps_per_second": 19.301,
+      "step": 400
+    },
+    {
+      "epoch": 0.2829473684210526,
+      "grad_norm": 1.2698249816894531,
+      "learning_rate": 1.9551340823358217e-05,
+      "loss": 0.5178,
+      "step": 420
+    },
+    {
+      "epoch": 0.296421052631579,
+      "grad_norm": 1.0051153898239136,
+      "learning_rate": 1.948301076724237e-05,
+      "loss": 0.5021,
+      "step": 440
+    },
+    {
+      "epoch": 0.3098947368421053,
+      "grad_norm": 1.0702327489852905,
+      "learning_rate": 1.9409976553623767e-05,
+      "loss": 0.4409,
+      "step": 460
+    },
+    {
+      "epoch": 0.3233684210526316,
+      "grad_norm": 1.2159368991851807,
+      "learning_rate": 1.9332274411971333e-05,
+      "loss": 0.4028,
+      "step": 480
+    },
+    {
+      "epoch": 0.3368421052631579,
+      "grad_norm": 1.3318655490875244,
+      "learning_rate": 1.924994288733386e-05,
+      "loss": 0.4199,
+      "step": 500
+    },
+    {
+      "epoch": 0.3503157894736842,
+      "grad_norm": 1.4047660827636719,
+      "learning_rate": 1.9163022821219252e-05,
+      "loss": 0.3865,
+      "step": 520
+    },
+    {
+      "epoch": 0.36378947368421055,
+      "grad_norm": 1.0359469652175903,
+      "learning_rate": 1.907155733133467e-05,
+      "loss": 0.3779,
+      "step": 540
+    },
+    {
+      "epoch": 0.37726315789473686,
+      "grad_norm": 1.1933883428573608,
+      "learning_rate": 1.897559179019745e-05,
+      "loss": 0.358,
+      "step": 560
+    },
+    {
+      "epoch": 0.39073684210526316,
+      "grad_norm": 1.1323217153549194,
+      "learning_rate": 1.8875173802627565e-05,
+      "loss": 0.3715,
+      "step": 580
+    },
+    {
+      "epoch": 0.40421052631578946,
+      "grad_norm": 1.0181940793991089,
+      "learning_rate": 1.8770353182132684e-05,
+      "loss": 0.3428,
+      "step": 600
+    },
+    {
+      "epoch": 0.40421052631578946,
+      "eval_loss": 0.37542256712913513,
+      "eval_runtime": 89.5428,
+      "eval_samples_per_second": 27.92,
+      "eval_steps_per_second": 13.96,
+      "step": 600
+    },
+    {
+      "epoch": 0.41768421052631577,
+      "grad_norm": 1.0640043020248413,
+      "learning_rate": 1.8661181926197627e-05,
+      "loss": 0.3183,
+      "step": 620
+    },
+    {
+      "epoch": 0.43115789473684213,
+      "grad_norm": 0.9247826933860779,
+      "learning_rate": 1.8547714190490385e-05,
+      "loss": 0.3054,
+      "step": 640
+    },
+    {
+      "epoch": 0.44463157894736843,
+      "grad_norm": 1.059590458869934,
+      "learning_rate": 1.8430006261997585e-05,
+      "loss": 0.3088,
+      "step": 660
+    },
+    {
+      "epoch": 0.45810526315789474,
+      "grad_norm": 1.1035728454589844,
+      "learning_rate": 1.8308116531102674e-05,
+      "loss": 0.3163,
+      "step": 680
+    },
+    {
+      "epoch": 0.47157894736842104,
+      "grad_norm": 1.2538524866104126,
+      "learning_rate": 1.81821054626207e-05,
+      "loss": 0.2959,
+      "step": 700
+    },
+    {
+      "epoch": 0.48505263157894735,
+      "grad_norm": 1.0344308614730835,
+      "learning_rate": 1.8052035565804024e-05,
+      "loss": 0.2746,
+      "step": 720
+    },
+    {
+      "epoch": 0.4985263157894737,
+      "grad_norm": 1.0735608339309692,
+      "learning_rate": 1.7917971363333896e-05,
+      "loss": 0.2804,
+      "step": 740
+    },
+    {
+      "epoch": 0.512,
+      "grad_norm": 0.9861028790473938,
+      "learning_rate": 1.777997935931322e-05,
+      "loss": 0.266,
+      "step": 760
+    },
+    {
+      "epoch": 0.5254736842105263,
+      "grad_norm": 1.2618284225463867,
+      "learning_rate": 1.7638128006276422e-05,
+      "loss": 0.267,
+      "step": 780
+    },
+    {
+      "epoch": 0.5389473684210526,
+      "grad_norm": 0.7712630033493042,
+      "learning_rate": 1.7492487671232784e-05,
+      "loss": 0.245,
+      "step": 800
+    },
+    {
+      "epoch": 0.5389473684210526,
+      "eval_loss": 0.2860563099384308,
+      "eval_runtime": 168.0857,
+      "eval_samples_per_second": 14.873,
+      "eval_steps_per_second": 7.437,
+      "step": 800
+    },
+    {
+      "epoch": 0.5524210526315789,
+      "grad_norm": 1.0299738645553589,
+      "learning_rate": 1.7343130600760068e-05,
+      "loss": 0.246,
+      "step": 820
+    },
+    {
+      "epoch": 0.5658947368421052,
+      "grad_norm": 1.2506853342056274,
+      "learning_rate": 1.719013088516576e-05,
+      "loss": 0.237,
+      "step": 840
+    },
+    {
+      "epoch": 0.5793684210526315,
+      "grad_norm": 0.7798528671264648,
+      "learning_rate": 1.7033564421733717e-05,
+      "loss": 0.2329,
+      "step": 860
+    },
+    {
+      "epoch": 0.592842105263158,
+      "grad_norm": 1.0132075548171997,
+      "learning_rate": 1.6873508877074443e-05,
+      "loss": 0.2437,
+      "step": 880
+    },
+    {
+      "epoch": 0.6063157894736843,
+      "grad_norm": 0.9196950197219849,
+      "learning_rate": 1.6710043648597656e-05,
+      "loss": 0.2409,
+      "step": 900
+    },
+    {
+      "epoch": 0.6197894736842106,
+      "grad_norm": 0.8007433414459229,
+      "learning_rate": 1.6543249825126285e-05,
+      "loss": 0.2188,
+      "step": 920
+    },
+    {
+      "epoch": 0.6332631578947369,
+      "grad_norm": 0.7954726219177246,
+      "learning_rate": 1.6373210146671437e-05,
+      "loss": 0.2076,
+      "step": 940
+    },
+    {
+      "epoch": 0.6467368421052632,
+      "grad_norm": 0.7610670924186707,
+      "learning_rate": 1.6200008963388216e-05,
+      "loss": 0.2153,
+      "step": 960
+    },
+    {
+      "epoch": 0.6602105263157895,
+      "grad_norm": 0.7918910980224609,
+      "learning_rate": 1.6023732193732886e-05,
+      "loss": 0.205,
+      "step": 980
+    },
+    {
+      "epoch": 0.6736842105263158,
+      "grad_norm": 0.8178799152374268,
+      "learning_rate": 1.5844467281842007e-05,
+      "loss": 0.1922,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6736842105263158,
+      "eval_loss": 0.23861029744148254,
+      "eval_runtime": 171.498,
+      "eval_samples_per_second": 14.577,
+      "eval_steps_per_second": 7.289,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 2970,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.113474465792e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

qwen3-150m-finetuned-antonyms/checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a412f0daaffd5e2c26806635af04cb23c8d6303f45667ff9b72e81b4e25e433
+size 5304

qwen3-150m-finetuned-antonyms/checkpoint-1200/config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "eos_token_id": 3,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 2048,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "num_key_value_heads": 12,
+  "pad_token_id": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 32000
+}

qwen3-150m-finetuned-antonyms/checkpoint-1200/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2,
+  "eos_token_id": 3,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}

qwen3-150m-finetuned-antonyms/checkpoint-1200/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:462a32e73f3a4936485b53020ad081d68b4e0dc546198309d14cac315899e2cf
+size 551392792

qwen3-150m-finetuned-antonyms/checkpoint-1200/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7b1ab16b599d1f46899f67ac9cbefffc5974e861930db90778996bb685dd93c
+size 1102868538

qwen3-150m-finetuned-antonyms/checkpoint-1200/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3905f03b154613277addb21f09efb9dda07a5e9f0835de2cf6f43a703e22bde7
+size 14244

qwen3-150m-finetuned-antonyms/checkpoint-1200/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a0ed83419e2437807687d8c84bc2054f55fad8dca47bd2d868ecfc370acaf55
+size 1064

qwen3-150m-finetuned-antonyms/checkpoint-1200/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen3-150m-finetuned-antonyms/checkpoint-1200/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen3-150m-finetuned-antonyms/checkpoint-1200/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<en>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<te>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<mai>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "unk_token": "<unk>"
+}

qwen3-150m-finetuned-antonyms/checkpoint-1200/trainer_state.json ADDED Viewed

	@@ -0,0 +1,502 @@

+{
+  "best_global_step": 1200,
+  "best_metric": 0.2097395956516266,
+  "best_model_checkpoint": "./qwen3-150m-finetuned-antonyms\\checkpoint-1200",
+  "epoch": 0.8084210526315789,
+  "eval_steps": 200,
+  "global_step": 1200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.013473684210526317,
+      "grad_norm": 2.7961366176605225,
+      "learning_rate": 2.5503355704697992e-06,
+      "loss": 2.917,
+      "step": 20
+    },
+    {
+      "epoch": 0.026947368421052633,
+      "grad_norm": 2.30360746383667,
+      "learning_rate": 5.234899328859061e-06,
+      "loss": 2.3528,
+      "step": 40
+    },
+    {
+      "epoch": 0.04042105263157895,
+      "grad_norm": 1.5964854955673218,
+      "learning_rate": 7.919463087248322e-06,
+      "loss": 1.8678,
+      "step": 60
+    },
+    {
+      "epoch": 0.053894736842105266,
+      "grad_norm": 1.4178063869476318,
+      "learning_rate": 1.0604026845637586e-05,
+      "loss": 1.7207,
+      "step": 80
+    },
+    {
+      "epoch": 0.06736842105263158,
+      "grad_norm": 1.6399588584899902,
+      "learning_rate": 1.3288590604026848e-05,
+      "loss": 1.5277,
+      "step": 100
+    },
+    {
+      "epoch": 0.0808421052631579,
+      "grad_norm": 1.6011607646942139,
+      "learning_rate": 1.5973154362416107e-05,
+      "loss": 1.4006,
+      "step": 120
+    },
+    {
+      "epoch": 0.09431578947368421,
+      "grad_norm": 1.3836917877197266,
+      "learning_rate": 1.865771812080537e-05,
+      "loss": 1.342,
+      "step": 140
+    },
+    {
+      "epoch": 0.10778947368421053,
+      "grad_norm": 1.8060948848724365,
+      "learning_rate": 1.9999379903772885e-05,
+      "loss": 1.2442,
+      "step": 160
+    },
+    {
+      "epoch": 0.12126315789473684,
+      "grad_norm": 1.4634225368499756,
+      "learning_rate": 1.999441959536959e-05,
+      "loss": 1.1505,
+      "step": 180
+    },
+    {
+      "epoch": 0.13473684210526315,
+      "grad_norm": 1.609693169593811,
+      "learning_rate": 1.9984501439181532e-05,
+      "loss": 1.0635,
+      "step": 200
+    },
+    {
+      "epoch": 0.13473684210526315,
+      "eval_loss": 1.1043256521224976,
+      "eval_runtime": 88.2411,
+      "eval_samples_per_second": 28.331,
+      "eval_steps_per_second": 14.166,
+      "step": 200
+    },
+    {
+      "epoch": 0.1482105263157895,
+      "grad_norm": 1.3189349174499512,
+      "learning_rate": 1.996963035522515e-05,
+      "loss": 0.9328,
+      "step": 220
+    },
+    {
+      "epoch": 0.1616842105263158,
+      "grad_norm": 1.6366477012634277,
+      "learning_rate": 1.9949813720474152e-05,
+      "loss": 0.8752,
+      "step": 240
+    },
+    {
+      "epoch": 0.1751578947368421,
+      "grad_norm": 1.3156894445419312,
+      "learning_rate": 1.9925061365200102e-05,
+      "loss": 0.811,
+      "step": 260
+    },
+    {
+      "epoch": 0.18863157894736843,
+      "grad_norm": 1.4519543647766113,
+      "learning_rate": 1.989538556809598e-05,
+      "loss": 0.7621,
+      "step": 280
+    },
+    {
+      "epoch": 0.20210526315789473,
+      "grad_norm": 1.3389956951141357,
+      "learning_rate": 1.986080105018521e-05,
+      "loss": 0.7399,
+      "step": 300
+    },
+    {
+      "epoch": 0.21557894736842106,
+      "grad_norm": 1.240100622177124,
+      "learning_rate": 1.9821324967519113e-05,
+      "loss": 0.6431,
+      "step": 320
+    },
+    {
+      "epoch": 0.22905263157894737,
+      "grad_norm": 1.7040982246398926,
+      "learning_rate": 1.9776976902666452e-05,
+      "loss": 0.5932,
+      "step": 340
+    },
+    {
+      "epoch": 0.24252631578947367,
+      "grad_norm": 1.1405657529830933,
+      "learning_rate": 1.9727778854999283e-05,
+      "loss": 0.5804,
+      "step": 360
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 1.4422856569290161,
+      "learning_rate": 1.9673755229779884e-05,
+      "loss": 0.5823,
+      "step": 380
+    },
+    {
+      "epoch": 0.2694736842105263,
+      "grad_norm": 1.2760127782821655,
+      "learning_rate": 1.9614932826054274e-05,
+      "loss": 0.5321,
+      "step": 400
+    },
+    {
+      "epoch": 0.2694736842105263,
+      "eval_loss": 0.5738449096679688,
+      "eval_runtime": 64.7646,
+      "eval_samples_per_second": 38.601,
+      "eval_steps_per_second": 19.301,
+      "step": 400
+    },
+    {
+      "epoch": 0.2829473684210526,
+      "grad_norm": 1.2698249816894531,
+      "learning_rate": 1.9551340823358217e-05,
+      "loss": 0.5178,
+      "step": 420
+    },
+    {
+      "epoch": 0.296421052631579,
+      "grad_norm": 1.0051153898239136,
+      "learning_rate": 1.948301076724237e-05,
+      "loss": 0.5021,
+      "step": 440
+    },
+    {
+      "epoch": 0.3098947368421053,
+      "grad_norm": 1.0702327489852905,
+      "learning_rate": 1.9409976553623767e-05,
+      "loss": 0.4409,
+      "step": 460
+    },
+    {
+      "epoch": 0.3233684210526316,
+      "grad_norm": 1.2159368991851807,
+      "learning_rate": 1.9332274411971333e-05,
+      "loss": 0.4028,
+      "step": 480
+    },
+    {
+      "epoch": 0.3368421052631579,
+      "grad_norm": 1.3318655490875244,
+      "learning_rate": 1.924994288733386e-05,
+      "loss": 0.4199,
+      "step": 500
+    },
+    {
+      "epoch": 0.3503157894736842,
+      "grad_norm": 1.4047660827636719,
+      "learning_rate": 1.9163022821219252e-05,
+      "loss": 0.3865,
+      "step": 520
+    },
+    {
+      "epoch": 0.36378947368421055,
+      "grad_norm": 1.0359469652175903,
+      "learning_rate": 1.907155733133467e-05,
+      "loss": 0.3779,
+      "step": 540
+    },
+    {
+      "epoch": 0.37726315789473686,
+      "grad_norm": 1.1933883428573608,
+      "learning_rate": 1.897559179019745e-05,
+      "loss": 0.358,
+      "step": 560
+    },
+    {
+      "epoch": 0.39073684210526316,
+      "grad_norm": 1.1323217153549194,
+      "learning_rate": 1.8875173802627565e-05,
+      "loss": 0.3715,
+      "step": 580
+    },
+    {
+      "epoch": 0.40421052631578946,
+      "grad_norm": 1.0181940793991089,
+      "learning_rate": 1.8770353182132684e-05,
+      "loss": 0.3428,
+      "step": 600
+    },
+    {
+      "epoch": 0.40421052631578946,
+      "eval_loss": 0.37542256712913513,
+      "eval_runtime": 89.5428,
+      "eval_samples_per_second": 27.92,
+      "eval_steps_per_second": 13.96,
+      "step": 600
+    },
+    {
+      "epoch": 0.41768421052631577,
+      "grad_norm": 1.0640043020248413,
+      "learning_rate": 1.8661181926197627e-05,
+      "loss": 0.3183,
+      "step": 620
+    },
+    {
+      "epoch": 0.43115789473684213,
+      "grad_norm": 0.9247826933860779,
+      "learning_rate": 1.8547714190490385e-05,
+      "loss": 0.3054,
+      "step": 640
+    },
+    {
+      "epoch": 0.44463157894736843,
+      "grad_norm": 1.059590458869934,
+      "learning_rate": 1.8430006261997585e-05,
+      "loss": 0.3088,
+      "step": 660
+    },
+    {
+      "epoch": 0.45810526315789474,
+      "grad_norm": 1.1035728454589844,
+      "learning_rate": 1.8308116531102674e-05,
+      "loss": 0.3163,
+      "step": 680
+    },
+    {
+      "epoch": 0.47157894736842104,
+      "grad_norm": 1.2538524866104126,
+      "learning_rate": 1.81821054626207e-05,
+      "loss": 0.2959,
+      "step": 700
+    },
+    {
+      "epoch": 0.48505263157894735,
+      "grad_norm": 1.0344308614730835,
+      "learning_rate": 1.8052035565804024e-05,
+      "loss": 0.2746,
+      "step": 720
+    },
+    {
+      "epoch": 0.4985263157894737,
+      "grad_norm": 1.0735608339309692,
+      "learning_rate": 1.7917971363333896e-05,
+      "loss": 0.2804,
+      "step": 740
+    },
+    {
+      "epoch": 0.512,
+      "grad_norm": 0.9861028790473938,
+      "learning_rate": 1.777997935931322e-05,
+      "loss": 0.266,
+      "step": 760
+    },
+    {
+      "epoch": 0.5254736842105263,
+      "grad_norm": 1.2618284225463867,
+      "learning_rate": 1.7638128006276422e-05,
+      "loss": 0.267,
+      "step": 780
+    },
+    {
+      "epoch": 0.5389473684210526,
+      "grad_norm": 0.7712630033493042,
+      "learning_rate": 1.7492487671232784e-05,
+      "loss": 0.245,
+      "step": 800
+    },
+    {
+      "epoch": 0.5389473684210526,
+      "eval_loss": 0.2860563099384308,
+      "eval_runtime": 168.0857,
+      "eval_samples_per_second": 14.873,
+      "eval_steps_per_second": 7.437,
+      "step": 800
+    },
+    {
+      "epoch": 0.5524210526315789,
+      "grad_norm": 1.0299738645553589,
+      "learning_rate": 1.7343130600760068e-05,
+      "loss": 0.246,
+      "step": 820
+    },
+    {
+      "epoch": 0.5658947368421052,
+      "grad_norm": 1.2506853342056274,
+      "learning_rate": 1.719013088516576e-05,
+      "loss": 0.237,
+      "step": 840
+    },
+    {
+      "epoch": 0.5793684210526315,
+      "grad_norm": 0.7798528671264648,
+      "learning_rate": 1.7033564421733717e-05,
+      "loss": 0.2329,
+      "step": 860
+    },
+    {
+      "epoch": 0.592842105263158,
+      "grad_norm": 1.0132075548171997,
+      "learning_rate": 1.6873508877074443e-05,
+      "loss": 0.2437,
+      "step": 880
+    },
+    {
+      "epoch": 0.6063157894736843,
+      "grad_norm": 0.9196950197219849,
+      "learning_rate": 1.6710043648597656e-05,
+      "loss": 0.2409,
+      "step": 900
+    },
+    {
+      "epoch": 0.6197894736842106,
+      "grad_norm": 0.8007433414459229,
+      "learning_rate": 1.6543249825126285e-05,
+      "loss": 0.2188,
+      "step": 920
+    },
+    {
+      "epoch": 0.6332631578947369,
+      "grad_norm": 0.7954726219177246,
+      "learning_rate": 1.6373210146671437e-05,
+      "loss": 0.2076,
+      "step": 940
+    },
+    {
+      "epoch": 0.6467368421052632,
+      "grad_norm": 0.7610670924186707,
+      "learning_rate": 1.6200008963388216e-05,
+      "loss": 0.2153,
+      "step": 960
+    },
+    {
+      "epoch": 0.6602105263157895,
+      "grad_norm": 0.7918910980224609,
+      "learning_rate": 1.6023732193732886e-05,
+      "loss": 0.205,
+      "step": 980
+    },
+    {
+      "epoch": 0.6736842105263158,
+      "grad_norm": 0.8178799152374268,
+      "learning_rate": 1.5844467281842007e-05,
+      "loss": 0.1922,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6736842105263158,
+      "eval_loss": 0.23861029744148254,
+      "eval_runtime": 171.498,
+      "eval_samples_per_second": 14.577,
+      "eval_steps_per_second": 7.289,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6871578947368421,
+      "grad_norm": 0.8836315870285034,
+      "learning_rate": 1.5662303154154764e-05,
+      "loss": 0.1997,
+      "step": 1020
+    },
+    {
+      "epoch": 0.7006315789473684,
+      "grad_norm": 0.9780408143997192,
+      "learning_rate": 1.5477330175299964e-05,
+      "loss": 0.2065,
+      "step": 1040
+    },
+    {
+      "epoch": 0.7141052631578947,
+      "grad_norm": 0.7557224631309509,
+      "learning_rate": 1.5289640103269626e-05,
+      "loss": 0.2015,
+      "step": 1060
+    },
+    {
+      "epoch": 0.7275789473684211,
+      "grad_norm": 0.813556969165802,
+      "learning_rate": 1.5099326043901361e-05,
+      "loss": 0.201,
+      "step": 1080
+    },
+    {
+      "epoch": 0.7410526315789474,
+      "grad_norm": 0.5978159308433533,
+      "learning_rate": 1.4906482404692133e-05,
+      "loss": 0.1862,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7545263157894737,
+      "grad_norm": 0.8261023163795471,
+      "learning_rate": 1.471120484796634e-05,
+      "loss": 0.1946,
+      "step": 1120
+    },
+    {
+      "epoch": 0.768,
+      "grad_norm": 0.6979694366455078,
+      "learning_rate": 1.4513590243421394e-05,
+      "loss": 0.1763,
+      "step": 1140
+    },
+    {
+      "epoch": 0.7814736842105263,
+      "grad_norm": 0.7573559880256653,
+      "learning_rate": 1.4313736620074389e-05,
+      "loss": 0.1825,
+      "step": 1160
+    },
+    {
+      "epoch": 0.7949473684210526,
+      "grad_norm": 0.6948429346084595,
+      "learning_rate": 1.4111743117633676e-05,
+      "loss": 0.1835,
+      "step": 1180
+    },
+    {
+      "epoch": 0.8084210526315789,
+      "grad_norm": 0.7930866479873657,
+      "learning_rate": 1.3907709937319451e-05,
+      "loss": 0.1852,
+      "step": 1200
+    },
+    {
+      "epoch": 0.8084210526315789,
+      "eval_loss": 0.2097395956516266,
+      "eval_runtime": 169.5737,
+      "eval_samples_per_second": 14.743,
+      "eval_steps_per_second": 7.371,
+      "step": 1200
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 2970,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.3361693589504e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

qwen3-150m-finetuned-antonyms/checkpoint-1200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a412f0daaffd5e2c26806635af04cb23c8d6303f45667ff9b72e81b4e25e433
+size 5304

qwen3-150m-finetuned-antonyms/checkpoint-1400/config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "eos_token_id": 3,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 2048,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "num_key_value_heads": 12,
+  "pad_token_id": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 32000
+}

qwen3-150m-finetuned-antonyms/checkpoint-1400/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2,
+  "eos_token_id": 3,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}

qwen3-150m-finetuned-antonyms/checkpoint-1400/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:397684d9fa6022e669b07692bcdecdf0f8e6ee1fa2de3be85c62a8512f5e2741
+size 551392792

qwen3-150m-finetuned-antonyms/checkpoint-1400/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1fab8185b58c67fdbe9c4af8d00a7c4e203c2147f514401206f4b8fe97f0f05
+size 1102868538

qwen3-150m-finetuned-antonyms/checkpoint-1400/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff39c56748cc5e85691d62bfd6a79e60ab92c1f59b698543eba03446c493cd5c
+size 14244

qwen3-150m-finetuned-antonyms/checkpoint-1400/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:727efae6b2f9ee37e1e5756f41156bac19e15adede5c8242c737cd13b543e9f8
+size 1064

qwen3-150m-finetuned-antonyms/checkpoint-1400/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen3-150m-finetuned-antonyms/checkpoint-1400/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen3-150m-finetuned-antonyms/checkpoint-1400/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<en>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<te>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<mai>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "unk_token": "<unk>"
+}

qwen3-150m-finetuned-antonyms/checkpoint-1400/trainer_state.json ADDED Viewed

	@@ -0,0 +1,580 @@

+{
+  "best_global_step": 1400,
+  "best_metric": 0.19134877622127533,
+  "best_model_checkpoint": "./qwen3-150m-finetuned-antonyms\\checkpoint-1400",
+  "epoch": 0.9431578947368421,
+  "eval_steps": 200,
+  "global_step": 1400,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.013473684210526317,
+      "grad_norm": 2.7961366176605225,
+      "learning_rate": 2.5503355704697992e-06,
+      "loss": 2.917,
+      "step": 20
+    },
+    {
+      "epoch": 0.026947368421052633,
+      "grad_norm": 2.30360746383667,
+      "learning_rate": 5.234899328859061e-06,
+      "loss": 2.3528,
+      "step": 40
+    },
+    {
+      "epoch": 0.04042105263157895,
+      "grad_norm": 1.5964854955673218,
+      "learning_rate": 7.919463087248322e-06,
+      "loss": 1.8678,
+      "step": 60
+    },
+    {
+      "epoch": 0.053894736842105266,
+      "grad_norm": 1.4178063869476318,
+      "learning_rate": 1.0604026845637586e-05,
+      "loss": 1.7207,
+      "step": 80
+    },
+    {
+      "epoch": 0.06736842105263158,
+      "grad_norm": 1.6399588584899902,
+      "learning_rate": 1.3288590604026848e-05,
+      "loss": 1.5277,
+      "step": 100
+    },
+    {
+      "epoch": 0.0808421052631579,
+      "grad_norm": 1.6011607646942139,
+      "learning_rate": 1.5973154362416107e-05,
+      "loss": 1.4006,
+      "step": 120
+    },
+    {
+      "epoch": 0.09431578947368421,
+      "grad_norm": 1.3836917877197266,
+      "learning_rate": 1.865771812080537e-05,
+      "loss": 1.342,
+      "step": 140
+    },
+    {
+      "epoch": 0.10778947368421053,
+      "grad_norm": 1.8060948848724365,
+      "learning_rate": 1.9999379903772885e-05,
+      "loss": 1.2442,
+      "step": 160
+    },
+    {
+      "epoch": 0.12126315789473684,
+      "grad_norm": 1.4634225368499756,
+      "learning_rate": 1.999441959536959e-05,
+      "loss": 1.1505,
+      "step": 180
+    },
+    {
+      "epoch": 0.13473684210526315,
+      "grad_norm": 1.609693169593811,
+      "learning_rate": 1.9984501439181532e-05,
+      "loss": 1.0635,
+      "step": 200
+    },
+    {
+      "epoch": 0.13473684210526315,
+      "eval_loss": 1.1043256521224976,
+      "eval_runtime": 88.2411,
+      "eval_samples_per_second": 28.331,
+      "eval_steps_per_second": 14.166,
+      "step": 200
+    },
+    {
+      "epoch": 0.1482105263157895,
+      "grad_norm": 1.3189349174499512,
+      "learning_rate": 1.996963035522515e-05,
+      "loss": 0.9328,
+      "step": 220
+    },
+    {
+      "epoch": 0.1616842105263158,
+      "grad_norm": 1.6366477012634277,
+      "learning_rate": 1.9949813720474152e-05,
+      "loss": 0.8752,
+      "step": 240
+    },
+    {
+      "epoch": 0.1751578947368421,
+      "grad_norm": 1.3156894445419312,
+      "learning_rate": 1.9925061365200102e-05,
+      "loss": 0.811,
+      "step": 260
+    },
+    {
+      "epoch": 0.18863157894736843,
+      "grad_norm": 1.4519543647766113,
+      "learning_rate": 1.989538556809598e-05,
+      "loss": 0.7621,
+      "step": 280
+    },
+    {
+      "epoch": 0.20210526315789473,
+      "grad_norm": 1.3389956951141357,
+      "learning_rate": 1.986080105018521e-05,
+      "loss": 0.7399,
+      "step": 300
+    },
+    {
+      "epoch": 0.21557894736842106,
+      "grad_norm": 1.240100622177124,
+      "learning_rate": 1.9821324967519113e-05,
+      "loss": 0.6431,
+      "step": 320
+    },
+    {
+      "epoch": 0.22905263157894737,
+      "grad_norm": 1.7040982246398926,
+      "learning_rate": 1.9776976902666452e-05,
+      "loss": 0.5932,
+      "step": 340
+    },
+    {
+      "epoch": 0.24252631578947367,
+      "grad_norm": 1.1405657529830933,
+      "learning_rate": 1.9727778854999283e-05,
+      "loss": 0.5804,
+      "step": 360
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 1.4422856569290161,
+      "learning_rate": 1.9673755229779884e-05,
+      "loss": 0.5823,
+      "step": 380
+    },
+    {
+      "epoch": 0.2694736842105263,
+      "grad_norm": 1.2760127782821655,
+      "learning_rate": 1.9614932826054274e-05,
+      "loss": 0.5321,
+      "step": 400
+    },
+    {
+      "epoch": 0.2694736842105263,
+      "eval_loss": 0.5738449096679688,
+      "eval_runtime": 64.7646,
+      "eval_samples_per_second": 38.601,
+      "eval_steps_per_second": 19.301,
+      "step": 400
+    },
+    {
+      "epoch": 0.2829473684210526,
+      "grad_norm": 1.2698249816894531,
+      "learning_rate": 1.9551340823358217e-05,
+      "loss": 0.5178,
+      "step": 420
+    },
+    {
+      "epoch": 0.296421052631579,
+      "grad_norm": 1.0051153898239136,
+      "learning_rate": 1.948301076724237e-05,
+      "loss": 0.5021,
+      "step": 440
+    },
+    {
+      "epoch": 0.3098947368421053,
+      "grad_norm": 1.0702327489852905,
+      "learning_rate": 1.9409976553623767e-05,
+      "loss": 0.4409,
+      "step": 460
+    },
+    {
+      "epoch": 0.3233684210526316,
+      "grad_norm": 1.2159368991851807,
+      "learning_rate": 1.9332274411971333e-05,
+      "loss": 0.4028,
+      "step": 480
+    },
+    {
+      "epoch": 0.3368421052631579,
+      "grad_norm": 1.3318655490875244,
+      "learning_rate": 1.924994288733386e-05,
+      "loss": 0.4199,
+      "step": 500
+    },
+    {
+      "epoch": 0.3503157894736842,
+      "grad_norm": 1.4047660827636719,
+      "learning_rate": 1.9163022821219252e-05,
+      "loss": 0.3865,
+      "step": 520
+    },
+    {
+      "epoch": 0.36378947368421055,
+      "grad_norm": 1.0359469652175903,
+      "learning_rate": 1.907155733133467e-05,
+      "loss": 0.3779,
+      "step": 540
+    },
+    {
+      "epoch": 0.37726315789473686,
+      "grad_norm": 1.1933883428573608,
+      "learning_rate": 1.897559179019745e-05,
+      "loss": 0.358,
+      "step": 560
+    },
+    {
+      "epoch": 0.39073684210526316,
+      "grad_norm": 1.1323217153549194,
+      "learning_rate": 1.8875173802627565e-05,
+      "loss": 0.3715,
+      "step": 580
+    },
+    {
+      "epoch": 0.40421052631578946,
+      "grad_norm": 1.0181940793991089,
+      "learning_rate": 1.8770353182132684e-05,
+      "loss": 0.3428,
+      "step": 600
+    },
+    {
+      "epoch": 0.40421052631578946,
+      "eval_loss": 0.37542256712913513,
+      "eval_runtime": 89.5428,
+      "eval_samples_per_second": 27.92,
+      "eval_steps_per_second": 13.96,
+      "step": 600
+    },
+    {
+      "epoch": 0.41768421052631577,
+      "grad_norm": 1.0640043020248413,
+      "learning_rate": 1.8661181926197627e-05,
+      "loss": 0.3183,
+      "step": 620
+    },
+    {
+      "epoch": 0.43115789473684213,
+      "grad_norm": 0.9247826933860779,
+      "learning_rate": 1.8547714190490385e-05,
+      "loss": 0.3054,
+      "step": 640
+    },
+    {
+      "epoch": 0.44463157894736843,
+      "grad_norm": 1.059590458869934,
+      "learning_rate": 1.8430006261997585e-05,
+      "loss": 0.3088,
+      "step": 660
+    },
+    {
+      "epoch": 0.45810526315789474,
+      "grad_norm": 1.1035728454589844,
+      "learning_rate": 1.8308116531102674e-05,
+      "loss": 0.3163,
+      "step": 680
+    },
+    {
+      "epoch": 0.47157894736842104,
+      "grad_norm": 1.2538524866104126,
+      "learning_rate": 1.81821054626207e-05,
+      "loss": 0.2959,
+      "step": 700
+    },
+    {
+      "epoch": 0.48505263157894735,
+      "grad_norm": 1.0344308614730835,
+      "learning_rate": 1.8052035565804024e-05,
+      "loss": 0.2746,
+      "step": 720
+    },
+    {
+      "epoch": 0.4985263157894737,
+      "grad_norm": 1.0735608339309692,
+      "learning_rate": 1.7917971363333896e-05,
+      "loss": 0.2804,
+      "step": 740
+    },
+    {
+      "epoch": 0.512,
+      "grad_norm": 0.9861028790473938,
+      "learning_rate": 1.777997935931322e-05,
+      "loss": 0.266,
+      "step": 760
+    },
+    {
+      "epoch": 0.5254736842105263,
+      "grad_norm": 1.2618284225463867,
+      "learning_rate": 1.7638128006276422e-05,
+      "loss": 0.267,
+      "step": 780
+    },
+    {
+      "epoch": 0.5389473684210526,
+      "grad_norm": 0.7712630033493042,
+      "learning_rate": 1.7492487671232784e-05,
+      "loss": 0.245,
+      "step": 800
+    },
+    {
+      "epoch": 0.5389473684210526,
+      "eval_loss": 0.2860563099384308,
+      "eval_runtime": 168.0857,
+      "eval_samples_per_second": 14.873,
+      "eval_steps_per_second": 7.437,
+      "step": 800
+    },
+    {
+      "epoch": 0.5524210526315789,
+      "grad_norm": 1.0299738645553589,
+      "learning_rate": 1.7343130600760068e-05,
+      "loss": 0.246,
+      "step": 820
+    },
+    {
+      "epoch": 0.5658947368421052,
+      "grad_norm": 1.2506853342056274,
+      "learning_rate": 1.719013088516576e-05,
+      "loss": 0.237,
+      "step": 840
+    },
+    {
+      "epoch": 0.5793684210526315,
+      "grad_norm": 0.7798528671264648,
+      "learning_rate": 1.7033564421733717e-05,
+      "loss": 0.2329,
+      "step": 860
+    },
+    {
+      "epoch": 0.592842105263158,
+      "grad_norm": 1.0132075548171997,
+      "learning_rate": 1.6873508877074443e-05,
+      "loss": 0.2437,
+      "step": 880
+    },
+    {
+      "epoch": 0.6063157894736843,
+      "grad_norm": 0.9196950197219849,
+      "learning_rate": 1.6710043648597656e-05,
+      "loss": 0.2409,
+      "step": 900
+    },
+    {
+      "epoch": 0.6197894736842106,
+      "grad_norm": 0.8007433414459229,
+      "learning_rate": 1.6543249825126285e-05,
+      "loss": 0.2188,
+      "step": 920
+    },
+    {
+      "epoch": 0.6332631578947369,
+      "grad_norm": 0.7954726219177246,
+      "learning_rate": 1.6373210146671437e-05,
+      "loss": 0.2076,
+      "step": 940
+    },
+    {
+      "epoch": 0.6467368421052632,
+      "grad_norm": 0.7610670924186707,
+      "learning_rate": 1.6200008963388216e-05,
+      "loss": 0.2153,
+      "step": 960
+    },
+    {
+      "epoch": 0.6602105263157895,
+      "grad_norm": 0.7918910980224609,
+      "learning_rate": 1.6023732193732886e-05,
+      "loss": 0.205,
+      "step": 980
+    },
+    {
+      "epoch": 0.6736842105263158,
+      "grad_norm": 0.8178799152374268,
+      "learning_rate": 1.5844467281842007e-05,
+      "loss": 0.1922,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6736842105263158,
+      "eval_loss": 0.23861029744148254,
+      "eval_runtime": 171.498,
+      "eval_samples_per_second": 14.577,
+      "eval_steps_per_second": 7.289,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6871578947368421,
+      "grad_norm": 0.8836315870285034,
+      "learning_rate": 1.5662303154154764e-05,
+      "loss": 0.1997,
+      "step": 1020
+    },
+    {
+      "epoch": 0.7006315789473684,
+      "grad_norm": 0.9780408143997192,
+      "learning_rate": 1.5477330175299964e-05,
+      "loss": 0.2065,
+      "step": 1040
+    },
+    {
+      "epoch": 0.7141052631578947,
+      "grad_norm": 0.7557224631309509,
+      "learning_rate": 1.5289640103269626e-05,
+      "loss": 0.2015,
+      "step": 1060
+    },
+    {
+      "epoch": 0.7275789473684211,
+      "grad_norm": 0.813556969165802,
+      "learning_rate": 1.5099326043901361e-05,
+      "loss": 0.201,
+      "step": 1080
+    },
+    {
+      "epoch": 0.7410526315789474,
+      "grad_norm": 0.5978159308433533,
+      "learning_rate": 1.4906482404692133e-05,
+      "loss": 0.1862,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7545263157894737,
+      "grad_norm": 0.8261023163795471,
+      "learning_rate": 1.471120484796634e-05,
+      "loss": 0.1946,
+      "step": 1120
+    },
+    {
+      "epoch": 0.768,
+      "grad_norm": 0.6979694366455078,
+      "learning_rate": 1.4513590243421394e-05,
+      "loss": 0.1763,
+      "step": 1140
+    },
+    {
+      "epoch": 0.7814736842105263,
+      "grad_norm": 0.7573559880256653,
+      "learning_rate": 1.4313736620074389e-05,
+      "loss": 0.1825,
+      "step": 1160
+    },
+    {
+      "epoch": 0.7949473684210526,
+      "grad_norm": 0.6948429346084595,
+      "learning_rate": 1.4111743117633676e-05,
+      "loss": 0.1835,
+      "step": 1180
+    },
+    {
+      "epoch": 0.8084210526315789,
+      "grad_norm": 0.7930866479873657,
+      "learning_rate": 1.3907709937319451e-05,
+      "loss": 0.1852,
+      "step": 1200
+    },
+    {
+      "epoch": 0.8084210526315789,
+      "eval_loss": 0.2097395956516266,
+      "eval_runtime": 169.5737,
+      "eval_samples_per_second": 14.743,
+      "eval_steps_per_second": 7.371,
+      "step": 1200
+    },
+    {
+      "epoch": 0.8218947368421052,
+      "grad_norm": 0.6302672624588013,
+      "learning_rate": 1.3701738292157778e-05,
+      "loss": 0.1776,
+      "step": 1220
+    },
+    {
+      "epoch": 0.8353684210526315,
+      "grad_norm": 0.8323765397071838,
+      "learning_rate": 1.3493930356772705e-05,
+      "loss": 0.1793,
+      "step": 1240
+    },
+    {
+      "epoch": 0.8488421052631578,
+      "grad_norm": 0.6844717264175415,
+      "learning_rate": 1.328438921670134e-05,
+      "loss": 0.1652,
+      "step": 1260
+    },
+    {
+      "epoch": 0.8623157894736843,
+      "grad_norm": 0.8403880000114441,
+      "learning_rate": 1.3073218817257091e-05,
+      "loss": 0.174,
+      "step": 1280
+    },
+    {
+      "epoch": 0.8757894736842106,
+      "grad_norm": 0.7015041708946228,
+      "learning_rate": 1.2860523911966367e-05,
+      "loss": 0.164,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8892631578947369,
+      "grad_norm": 0.7717349529266357,
+      "learning_rate": 1.2646410010604397e-05,
+      "loss": 0.1738,
+      "step": 1320
+    },
+    {
+      "epoch": 0.9027368421052632,
+      "grad_norm": 0.7405645847320557,
+      "learning_rate": 1.2430983326855873e-05,
+      "loss": 0.1743,
+      "step": 1340
+    },
+    {
+      "epoch": 0.9162105263157895,
+      "grad_norm": 0.5770362019538879,
+      "learning_rate": 1.2214350725626412e-05,
+      "loss": 0.161,
+      "step": 1360
+    },
+    {
+      "epoch": 0.9296842105263158,
+      "grad_norm": 0.8527249693870544,
+      "learning_rate": 1.1996619670030996e-05,
+      "loss": 0.1672,
+      "step": 1380
+    },
+    {
+      "epoch": 0.9431578947368421,
+      "grad_norm": 0.632085382938385,
+      "learning_rate": 1.177789816808563e-05,
+      "loss": 0.1621,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9431578947368421,
+      "eval_loss": 0.19134877622127533,
+      "eval_runtime": 171.2315,
+      "eval_samples_per_second": 14.6,
+      "eval_steps_per_second": 7.3,
+      "step": 1400
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 2970,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.5588642521088e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

qwen3-150m-finetuned-antonyms/checkpoint-1400/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a412f0daaffd5e2c26806635af04cb23c8d6303f45667ff9b72e81b4e25e433
+size 5304

qwen3-150m-finetuned-coherence/config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "eos_token_id": 3,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 2048,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "num_key_value_heads": 12,
+  "pad_token_id": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 32000
+}

qwen3-150m-finetuned-coherence/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2,
+  "eos_token_id": 3,
+  "pad_token_id": 1,
+  "transformers_version": "4.55.4"
+}

qwen3-150m-finetuned-coherence/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b8f382919990f42c759118cd048ed70d91dfb5c0f7bb5a4fa0f0a5d8af6de2b
+size 551392792

qwen3-150m-finetuned-coherence/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen3-150m-finetuned-coherence/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen3-150m-finetuned-coherence/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<en>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<te>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<mai>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "unk_token": "<unk>"
+}

qwen3-150m-finetuned-coherence/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42ce8a14b90abc12d09ad1bff5f1e06a1650b59f2fc92456af0b5aee5b95345d
+size 5304

qwen3-150m-lora-finetune-logs-antonyms/events.out.tfevents.1757877473.Sai-Legion.21500.15 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5b1776bb810af8611770ee3158fd19e4764fefd59b74cc1220dc92a35b1c772
+size 58128

qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: c:\Users\vsai2\Documents\LAMA\training\qwen3-150m-multilingual-checkpoints\checkpoint-26000
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:c:\Users\vsai2\Documents\LAMA\training\qwen3-150m-multilingual-checkpoints\checkpoint-26000
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "c:\\Users\\vsai2\\Documents\\LAMA\\training\\qwen3-150m-multilingual-checkpoints\\checkpoint-26000",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "o_proj",
+    "k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc6d70ffbda4cefb54d9747218b65bc3d2a8d4695f8ab8206e08bf0d6fa061e6
+size 7090536

qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:888aeae7a648bfea0613788b73882d9936bf0a85dd21badd638f36057524f6ed
+size 14236666

qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6742e9aedbb18cd57e77c6a370ac184036662ffd5d7d8f51377ec9f414a9a705
+size 14244

qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12fc30ca2c48d51e9e7e17fb238e1615ff4b754d3a52eb27c6eb0589d54d8fc4
+size 1064

qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen3-150m-lora-finetuned-antonyms/checkpoint-4200/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff