baloglu321/gemma-2-2B-it-thinking-function_calling-V0

Files changed (5) hide show

README.md CHANGED Viewed

@@ -34,11 +34,11 @@ This model was trained with SFT.
 ### Framework versions
-- TRL: 0.15.1
-- Transformers: 4.48.3
-- Pytorch: 2.5.1+cu124
-- Datasets: 3.3.2
-- Tokenizers: 0.21.0
 ## Citations
@@ -49,7 +49,7 @@ Cite TRL as:
 ```bibtex
 @misc{vonwerra2022trl,
 	title        = {{TRL: Transformer Reinforcement Learning}},
-	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
 	year         = 2020,
 	journal      = {GitHub repository},
 	publisher    = {GitHub},

 ### Framework versions
+- TRL: 0.17.0
+- Transformers: 4.51.3
+- Pytorch: 2.6.0+cu124
+- Datasets: 3.6.0
+- Tokenizers: 0.21.1
 ## Citations
 ```bibtex
 @misc{vonwerra2022trl,
 	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
 	year         = 2020,
 	journal      = {GitHub repository},
 	publisher    = {GitHub},

adapter_config.json CHANGED Viewed

@@ -3,6 +3,7 @@
   "auto_mapping": null,
   "base_model_name_or_path": "google/gemma-2-2b-it",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
@@ -23,17 +24,18 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "v_proj",
     "down_proj",
-    "gate_proj",
     "embed_tokens",
-    "o_proj",
-    "q_proj",
-    "lm_head",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false
 }

   "auto_mapping": null,
   "base_model_name_or_path": "google/gemma-2-2b-it",
   "bias": "none",
+  "corda_config": null,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "lm_head",
+    "q_proj",
     "down_proj",
+    "k_proj",
     "embed_tokens",
+    "gate_proj",
+    "v_proj",
+    "up_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ac146acd21f54248f045ac12d50e4f1265afeb00901719a4740bcb1c2fc8e68
 size 2475549872

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3bdf32ea49d0e5a1c9152d96734629f75611c8a9c95d07067aa94572eb8ea5a
 size 2475549872

runs/May11_12-41-58_4dd80c48a4c5/events.out.tfevents.1746967345.4dd80c48a4c5.1808.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fe3c0968162b6b1e59ff1a06323c18e2d5997fd75ce83f6ff087f2e0a7b452f
+size 41400

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95485e8c267905da6178e167d719e7e47b4ce3c451d64b4e38b719a8ea4e08d8
-size 5624

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae19ba492176177f25b68ab411b8e2ca1ec69526ad23ca1df12e5faaeb36a253
+size 5752