Training in progress, step 4272

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,17 +1,17 @@
 ---
-base_model: EverAI-AI/gpt-oss-120b-mxfp4-r32-1001
 library_name: transformers
 model_name: training_output
 tags:
 - generated_from_trainer
-- sft
 - trl
 licence: license
 ---
 # Model Card for training_output
-This model is a fine-tuned version of [EverAI-AI/gpt-oss-120b-mxfp4-r32-1001](https://huggingface.co/EverAI-AI/gpt-oss-120b-mxfp4-r32-1001).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/lucas01/llmv3/runs/r1rzlb44)
 This model was trained with SFT.

 ---
+base_model: openai/gpt-oss-120b
 library_name: transformers
 model_name: training_output
 tags:
 - generated_from_trainer
 - trl
+- sft
 licence: license
 ---
 # Model Card for training_output
+This model is a fine-tuned version of [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/lucas01/llmv3/runs/dp6q56e4)
 This model was trained with SFT.

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "EverAI-AI/gpt-oss-120b-mxfp4-r32-1001",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
@@ -26,9 +26,9 @@
   "revision": null,
   "target_modules": [
     "k_proj",
-    "q_proj",
     "o_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "openai/gpt-oss-120b",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
   "revision": null,
   "target_modules": [
     "k_proj",
     "o_proj",
+    "v_proj",
+    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99de00849e25cb20d53b8b1e65930bcae9c4cd869edc1772b8a8c872c85c54d2
 size 191141984

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3634d85fa57df9c58a766b3c0428acc83994259af398cd63bdb62c59fe816af
 size 191141984

modelopt_state_train.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bfb852ea0ed85b7b9caf5cd1a5e37a44dcb3d82fcb0686c02766a3b2dcb2d442
 size 975611

 version https://git-lfs.github.com/spec/v1
+oid sha256:74fc53932082694dccae0d3bf3bdf8d40547c68d4c28a10f095a8f1f6a97a59e
 size 975611

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a80156191390f8d671a3bfb83048d2db6cca303038074393ce63e6f0b6368b4
 size 6353

 version https://git-lfs.github.com/spec/v1
+oid sha256:7546d710638c25dad821dceaa8664fb4a76ad6c5670f14d9154e55607fc519e9
 size 6353