zeng981 commited on
Commit
e612c2f
·
verified ·
1 Parent(s): 429c65e

Upload 7 files

Browse files
eval_2024-07-17-00-34-20/all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_bleu-4": 78.874164847223,
3
+ "predict_rouge-1": 95.77926588261774,
4
+ "predict_rouge-2": 31.06746947273648,
5
+ "predict_rouge-l": 95.77926588261774,
6
+ "predict_runtime": 19261.7879,
7
+ "predict_samples_per_second": 0.923,
8
+ "predict_steps_per_second": 0.461
9
+ }
eval_2024-07-17-00-34-20/generated_predictions.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_2024-07-17-00-34-20/llamaboard_config.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eval.batch_size: 2
2
+ eval.cutoff_len: 1024
3
+ eval.dataset:
4
+ - test_data1
5
+ eval.dataset_dir: data
6
+ eval.max_new_tokens: 512
7
+ eval.max_samples: '100000'
8
+ eval.output_dir: eval_2024-07-17-00-34-20
9
+ eval.predict: true
10
+ eval.temperature: 0.95
11
+ eval.top_p: 0.7
12
+ top.booster: auto
13
+ top.checkpoint_path:
14
+ - train_2024-07-05-17-07-59
15
+ top.finetuning_type: lora
16
+ top.model_name: Qwen2-7B
17
+ top.quantization_bit: none
18
+ top.quantization_method: bitsandbytes
19
+ top.rope_scaling: none
20
+ top.template: default
21
+ top.visual_inputs: false
eval_2024-07-17-00-34-20/predict_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_bleu-4": 78.874164847223,
3
+ "predict_rouge-1": 95.77926588261774,
4
+ "predict_rouge-2": 31.06746947273648,
5
+ "predict_rouge-l": 95.77926588261774,
6
+ "predict_runtime": 19261.7879,
7
+ "predict_samples_per_second": 0.923,
8
+ "predict_steps_per_second": 0.461
9
+ }
eval_2024-07-17-00-34-20/running_log.txt ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [INFO|parser.py:317] 2024-07-17 00:34:50,446 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: None
2
+
3
+ [INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,482 >> loading file vocab.json
4
+
5
+ [INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,482 >> loading file merges.txt
6
+
7
+ [INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,484 >> loading file tokenizer.json
8
+
9
+ [INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,484 >> loading file added_tokens.json
10
+
11
+ [INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,485 >> loading file special_tokens_map.json
12
+
13
+ [INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,485 >> loading file tokenizer_config.json
14
+
15
+ [WARNING|logging.py:313] 2024-07-17 00:34:50,771 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
16
+
17
+ [INFO|loader.py:50] 2024-07-17 00:34:50,772 >> Loading dataset test_data1.json...
18
+
19
+ [INFO|configuration_utils.py:731] 2024-07-17 00:34:51,772 >> loading configuration file D:\models\Qwen2-7B\config.json
20
+
21
+ [INFO|configuration_utils.py:800] 2024-07-17 00:34:51,774 >> Model config Qwen2Config {
22
+ "_name_or_path": "D:\\models\\Qwen2-7B",
23
+ "architectures": [
24
+ "Qwen2ForCausalLM"
25
+ ],
26
+ "attention_dropout": 0.0,
27
+ "bos_token_id": 151643,
28
+ "eos_token_id": 151643,
29
+ "hidden_act": "silu",
30
+ "hidden_size": 3584,
31
+ "initializer_range": 0.02,
32
+ "intermediate_size": 18944,
33
+ "max_position_embeddings": 131072,
34
+ "max_window_layers": 28,
35
+ "model_type": "qwen2",
36
+ "num_attention_heads": 28,
37
+ "num_hidden_layers": 28,
38
+ "num_key_value_heads": 4,
39
+ "rms_norm_eps": 1e-06,
40
+ "rope_theta": 1000000.0,
41
+ "sliding_window": 131072,
42
+ "tie_word_embeddings": false,
43
+ "torch_dtype": "bfloat16",
44
+ "transformers_version": "4.42.3",
45
+ "use_cache": true,
46
+ "use_sliding_window": false,
47
+ "vocab_size": 152064
48
+ }
49
+
50
+
51
+ [INFO|patcher.py:79] 2024-07-17 00:34:51,775 >> Using KV cache for faster generation.
52
+
53
+ [INFO|modeling_utils.py:3553] 2024-07-17 00:34:51,873 >> loading weights file D:\models\Qwen2-7B\model.safetensors.index.json
54
+
55
+ [INFO|modeling_utils.py:1531] 2024-07-17 00:34:51,874 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.
56
+
57
+ [INFO|configuration_utils.py:1000] 2024-07-17 00:34:51,876 >> Generate config GenerationConfig {
58
+ "bos_token_id": 151643,
59
+ "eos_token_id": 151643
60
+ }
61
+
62
+
63
+ [INFO|modeling_utils.py:4364] 2024-07-17 00:35:20,895 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.
64
+
65
+
66
+ [INFO|modeling_utils.py:4372] 2024-07-17 00:35:20,895 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at D:\models\Qwen2-7B.
67
+ If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.
68
+
69
+ [INFO|configuration_utils.py:953] 2024-07-17 00:35:20,899 >> loading configuration file D:\models\Qwen2-7B\generation_config.json
70
+
71
+ [INFO|configuration_utils.py:1000] 2024-07-17 00:35:20,900 >> Generate config GenerationConfig {
72
+ "bos_token_id": 151643,
73
+ "eos_token_id": 151643,
74
+ "max_new_tokens": 2048
75
+ }
76
+
77
+
78
+ [INFO|attention.py:80] 2024-07-17 00:35:21,646 >> Using torch SDPA for faster training and inference.
79
+
80
+ [INFO|adapter.py:195] 2024-07-17 00:35:22,672 >> Merged 1 adapter(s).
81
+
82
+ [INFO|adapter.py:203] 2024-07-17 00:35:22,673 >> Loaded adapter(s): saves\Qwen2-7B\lora\train_2024-07-05-17-07-59
83
+
84
+ [INFO|loader.py:196] 2024-07-17 00:35:22,679 >> all params: 7,615,616,512
85
+
86
+ [INFO|trainer.py:3788] 2024-07-17 00:35:22,749 >>
87
+ ***** Running Prediction *****
88
+
89
+ [INFO|trainer.py:3790] 2024-07-17 00:35:22,749 >> Num examples = 17771
90
+
91
+ [INFO|trainer.py:3793] 2024-07-17 00:35:22,750 >> Batch size = 2
92
+
93
+ [WARNING|logging.py:328] 2024-07-17 00:35:24,938 >> We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
94
+
95
+ [INFO|trainer.py:127] 2024-07-17 05:56:24,534 >> Saving prediction results to saves\Qwen2-7B\lora\eval_2024-07-17-00-34-20\generated_predictions.jsonl
96
+
eval_2024-07-17-00-34-20/trainer_log.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_2024-07-17-00-34-20/training_args.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adapter_name_or_path: saves\Qwen2-7B\lora\train_2024-07-05-17-07-59
2
+ cutoff_len: 1024
3
+ dataset: test_data1
4
+ dataset_dir: data
5
+ do_predict: true
6
+ finetuning_type: lora
7
+ flash_attn: auto
8
+ max_new_tokens: 512
9
+ max_samples: 100000
10
+ model_name_or_path: D:\models\Qwen2-7B
11
+ output_dir: saves\Qwen2-7B\lora\eval_2024-07-17-00-34-20
12
+ per_device_eval_batch_size: 2
13
+ predict_with_generate: true
14
+ preprocessing_num_workers: 16
15
+ quantization_method: bitsandbytes
16
+ stage: sft
17
+ temperature: 0.95
18
+ template: default
19
+ top_p: 0.7