gsmyrnis commited on
Commit
e3e4ce2
·
verified ·
1 Parent(s): 6eafe8b

Upload model

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ base_model: Qwen/Qwen2.5-7B-Instruct
5
+ tags:
6
+ - llama-factory
7
+ - full
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: am_300k
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # am_300k
18
+
19
+ This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the mlfoundations-dev/am_300k dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 8e-05
39
+ - train_batch_size: 1
40
+ - eval_batch_size: 8
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 16
44
+ - gradient_accumulation_steps: 32
45
+ - total_train_batch_size: 512
46
+ - total_eval_batch_size: 128
47
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 5.0
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - Transformers 4.46.1
59
+ - Pytorch 2.5.0a0+b465a5843b.nv24.09
60
+ - Datasets 3.5.0
61
+ - Tokenizers 0.20.3
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.989094874591058,
3
+ "total_flos": 1.517527475894721e+19,
4
+ "train_loss": 0.18812398983721149,
5
+ "train_runtime": 53023.3041,
6
+ "train_samples_per_second": 5.533,
7
+ "train_steps_per_second": 0.011
8
+ }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "/p/data1/mmlaion/dcft/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/a09a35458c702b33eeacc393d103063234e8bc28",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.46.1"
14
+ }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eda6cabeba77cc43015a179bfac36fa3175635f9f551ddd302e455d07b3b7765
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b725862e1a49961467e9500eb9ad0869a5e391d7951c76b0fe6a7e0eac6ff3d4
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22e5a4925d18fcfae863de4f54e62d07a0027ae113753408e73f645998014083
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf3dd50ff5510d4bc0a08a8086e09258a270d3ba53387afb2a52374131d279dd
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0091c17eeaef1ec3c2fb76a9933c6451c3aa8332a414073dd47fe2b336741f17
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aead2f630164a16566219369a66e47c3298a31133c013b7402b9988e51aca05
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06eb52757de476425798c5bd5f9ae9a1b26fde8a4bc6ebcefa5e84ef9c77e187
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:254ac2adea5d8c42bb4e1e4eb9d3b11abe5260317d0f7662f96b366314d4e69a
3
  size 1089994880
runs/May09_23-11-32_jrc0929/events.out.tfevents.1746825268.jrc0929.2178476.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbfe850ca34492952eb15ecb5fdd6dcfa41d3008d68bcf94e460368ac0f6509
3
+ size 5664
runs/May09_23-26-27_jrc0932/events.out.tfevents.1746826152.jrc0932.2137577.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ff9c0cf8d1269c6731c7af55e9696f8bf17b67e8801f16c707e6f0aea163a13
3
+ size 82808
runs/May10_23-29-46_jrc0936/events.out.tfevents.1746912747.jrc0936.2314569.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0980a53972ac0af48fa6dd6d9d97db249aabd01a4743c1babce38ad84dc490a7
3
+ size 53762
start_end.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"start_time": "2025-05-10 23:29:46", "end_time": "2025-05-11 14:16:22"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.989094874591058,
3
+ "total_flos": 1.517527475894721e+19,
4
+ "train_loss": 0.18812398983721149,
5
+ "train_runtime": 53023.3041,
6
+ "train_samples_per_second": 5.533,
7
+ "train_steps_per_second": 0.011
8
+ }
trainer_log.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e27c7eeccc0a1d9ead0facd6dd9466bfb4d9868c32eaa398bbcd8bf9fce147f5
3
- size 7288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36e7c90be2d76bc5ff25e2f26216103892088d22325c079224b6a0eea8e2deea
3
+ size 7352
training_loss.png ADDED