Training in progress, step 40
Browse files- README.md +12 -13
- config.json +13 -5
- generation_config.json +1 -1
- model.safetensors +1 -1
- runs/Feb28_08-00-10_742caaa5b789/events.out.tfevents.1772265610.742caaa5b789.391.0 +3 -0
- tokenizer.json +2 -2
- tokenizer_config.json +0 -0
- training_args.bin +2 -2
README.md
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
---
|
| 2 |
-
base_model: google/functiongemma-270m-it
|
| 3 |
library_name: transformers
|
| 4 |
model_name: functiongemma-270m-it-simple-tool-calling
|
| 5 |
tags:
|
|
@@ -11,7 +10,7 @@ licence: license
|
|
| 11 |
|
| 12 |
# Model Card for functiongemma-270m-it-simple-tool-calling
|
| 13 |
|
| 14 |
-
This model is a fine-tuned version of [
|
| 15 |
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 16 |
|
| 17 |
## Quick start
|
|
@@ -30,15 +29,16 @@ print(output["generated_text"])
|
|
| 30 |
|
| 31 |
|
| 32 |
|
|
|
|
| 33 |
This model was trained with SFT.
|
| 34 |
|
| 35 |
### Framework versions
|
| 36 |
|
| 37 |
-
- TRL: 0.
|
| 38 |
-
- Transformers:
|
| 39 |
-
- Pytorch: 2.
|
| 40 |
- Datasets: 4.0.0
|
| 41 |
-
- Tokenizers: 0.22.
|
| 42 |
|
| 43 |
## Citations
|
| 44 |
|
|
@@ -47,12 +47,11 @@ This model was trained with SFT.
|
|
| 47 |
Cite TRL as:
|
| 48 |
|
| 49 |
```bibtex
|
| 50 |
-
@
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
howpublished = {\url{https://github.com/huggingface/trl}}
|
| 57 |
}
|
| 58 |
```
|
|
|
|
| 1 |
---
|
|
|
|
| 2 |
library_name: transformers
|
| 3 |
model_name: functiongemma-270m-it-simple-tool-calling
|
| 4 |
tags:
|
|
|
|
| 10 |
|
| 11 |
# Model Card for functiongemma-270m-it-simple-tool-calling
|
| 12 |
|
| 13 |
+
This model is a fine-tuned version of [None](https://huggingface.co/None).
|
| 14 |
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 15 |
|
| 16 |
## Quick start
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
|
| 32 |
+
|
| 33 |
This model was trained with SFT.
|
| 34 |
|
| 35 |
### Framework versions
|
| 36 |
|
| 37 |
+
- TRL: 0.29.0
|
| 38 |
+
- Transformers: 5.0.0
|
| 39 |
+
- Pytorch: 2.10.0+cu128
|
| 40 |
- Datasets: 4.0.0
|
| 41 |
+
- Tokenizers: 0.22.2
|
| 42 |
|
| 43 |
## Citations
|
| 44 |
|
|
|
|
| 47 |
Cite TRL as:
|
| 48 |
|
| 49 |
```bibtex
|
| 50 |
+
@software{vonwerra2020trl,
|
| 51 |
+
title = {{TRL: Transformers Reinforcement Learning}},
|
| 52 |
+
author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
|
| 53 |
+
license = {Apache-2.0},
|
| 54 |
+
url = {https://github.com/huggingface/trl},
|
| 55 |
+
year = {2020}
|
|
|
|
| 56 |
}
|
| 57 |
```
|
config.json
CHANGED
|
@@ -43,12 +43,20 @@
|
|
| 43 |
"pad_token_id": 0,
|
| 44 |
"query_pre_attn_scalar": 256,
|
| 45 |
"rms_norm_eps": 1e-06,
|
| 46 |
-
"
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
"sliding_window": 512,
|
| 50 |
-
"
|
|
|
|
| 51 |
"use_bidirectional_attention": false,
|
| 52 |
-
"use_cache":
|
| 53 |
"vocab_size": 262144
|
| 54 |
}
|
|
|
|
| 43 |
"pad_token_id": 0,
|
| 44 |
"query_pre_attn_scalar": 256,
|
| 45 |
"rms_norm_eps": 1e-06,
|
| 46 |
+
"rope_parameters": {
|
| 47 |
+
"full_attention": {
|
| 48 |
+
"rope_theta": 1000000.0,
|
| 49 |
+
"rope_type": "default"
|
| 50 |
+
},
|
| 51 |
+
"sliding_attention": {
|
| 52 |
+
"rope_theta": 10000.0,
|
| 53 |
+
"rope_type": "default"
|
| 54 |
+
}
|
| 55 |
+
},
|
| 56 |
"sliding_window": 512,
|
| 57 |
+
"tie_word_embeddings": true,
|
| 58 |
+
"transformers_version": "5.0.0",
|
| 59 |
"use_bidirectional_attention": false,
|
| 60 |
+
"use_cache": false,
|
| 61 |
"vocab_size": 262144
|
| 62 |
}
|
generation_config.json
CHANGED
|
@@ -10,5 +10,5 @@
|
|
| 10 |
"pad_token_id": 0,
|
| 11 |
"top_k": 64,
|
| 12 |
"top_p": 0.95,
|
| 13 |
-
"transformers_version": "
|
| 14 |
}
|
|
|
|
| 10 |
"pad_token_id": 0,
|
| 11 |
"top_k": 64,
|
| 12 |
"top_p": 0.95,
|
| 13 |
+
"transformers_version": "5.0.0"
|
| 14 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 536223056
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b72823a73feceb25868da1db7a6aa1b2e7caaa457a303daf1d12f90a9c64c6e3
|
| 3 |
size 536223056
|
runs/Feb28_08-00-10_742caaa5b789/events.out.tfevents.1772265610.742caaa5b789.391.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc603ff5934ad197fabf6f77cd397635757179a076b6ddc679b47618af441e16
|
| 3 |
+
size 24476
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3655797f9d732b7dc08b4225200697af8e37d94b74711d9b1d8166feb953578
|
| 3 |
+
size 33384774
|
tokenizer_config.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7e62cfb706287057356d240fea739c2aed3842d9777a9d44f5ae25716b92617
|
| 3 |
+
size 5649
|