zeng981 commited on
Commit
64fa4f1
·
verified ·
1 Parent(s): 3c382a5

Upload 7 files

Browse files
eval_2024-07-21-20-18-07/all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_bleu-4": 79.14533431433236,
3
+ "predict_rouge-1": 96.22080920600978,
4
+ "predict_rouge-2": 31.089978054133137,
5
+ "predict_rouge-l": 96.22080920600978,
6
+ "predict_runtime": 13302.1594,
7
+ "predict_samples_per_second": 1.336,
8
+ "predict_steps_per_second": 0.668
9
+ }
eval_2024-07-21-20-18-07/generated_predictions.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_2024-07-21-20-18-07/llamaboard_config.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eval.batch_size: 2
2
+ eval.cutoff_len: 1024
3
+ eval.dataset:
4
+ - test_data1
5
+ eval.dataset_dir: data
6
+ eval.max_new_tokens: 512
7
+ eval.max_samples: '100000'
8
+ eval.output_dir: eval_2024-07-21-20-18-07
9
+ eval.predict: true
10
+ eval.temperature: 0.95
11
+ eval.top_p: 0.7
12
+ top.booster: auto
13
+ top.checkpoint_path:
14
+ - train_2024-07-21-20-18-07
15
+ top.finetuning_type: lora
16
+ top.model_name: Gemma-2-9B
17
+ top.quantization_bit: '4'
18
+ top.quantization_method: bitsandbytes
19
+ top.rope_scaling: none
20
+ top.template: default
21
+ top.visual_inputs: false
eval_2024-07-21-20-18-07/predict_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_bleu-4": 79.14533431433236,
3
+ "predict_rouge-1": 96.22080920600978,
4
+ "predict_rouge-2": 31.089978054133137,
5
+ "predict_rouge-l": 96.22080920600978,
6
+ "predict_runtime": 13302.1594,
7
+ "predict_samples_per_second": 1.336,
8
+ "predict_steps_per_second": 0.668
9
+ }
eval_2024-07-21-20-18-07/running_log.txt ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [WARNING|parser.py:255] 2024-07-31 20:33:17,976 >> Evaluating model in 4/8-bit mode may cause lower scores.
2
+
3
+ [INFO|parser.py:317] 2024-07-31 20:33:17,978 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: None
4
+
5
+ [INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,076 >> loading file tokenizer.model
6
+
7
+ [INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,076 >> loading file tokenizer.json
8
+
9
+ [INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,077 >> loading file added_tokens.json
10
+
11
+ [INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,078 >> loading file special_tokens_map.json
12
+
13
+ [INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,078 >> loading file tokenizer_config.json
14
+
15
+ [INFO|loader.py:50] 2024-07-31 20:33:18,960 >> Loading dataset test_data1.json...
16
+
17
+ [INFO|configuration_utils.py:731] 2024-07-31 20:33:42,848 >> loading configuration file D:\models\gemma-2-9b\config.json
18
+
19
+ [INFO|configuration_utils.py:800] 2024-07-31 20:33:42,851 >> Model config Gemma2Config {
20
+ "_name_or_path": "D:\\models\\gemma-2-9b",
21
+ "architectures": [
22
+ "Gemma2ForCausalLM"
23
+ ],
24
+ "attention_bias": false,
25
+ "attention_dropout": 0.0,
26
+ "attn_logit_softcapping": 50.0,
27
+ "bos_token_id": 2,
28
+ "cache_implementation": "hybrid",
29
+ "eos_token_id": 1,
30
+ "final_logit_softcapping": 30.0,
31
+ "head_dim": 256,
32
+ "hidden_act": "gelu_pytorch_tanh",
33
+ "hidden_activation": "gelu_pytorch_tanh",
34
+ "hidden_size": 3584,
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 14336,
37
+ "max_position_embeddings": 8192,
38
+ "model_type": "gemma2",
39
+ "num_attention_heads": 16,
40
+ "num_hidden_layers": 42,
41
+ "num_key_value_heads": 8,
42
+ "pad_token_id": 0,
43
+ "query_pre_attn_scalar": 224,
44
+ "rms_norm_eps": 1e-06,
45
+ "rope_theta": 10000.0,
46
+ "sliding_window": 4096,
47
+ "sliding_window_size": 4096,
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.42.3",
50
+ "use_cache": true,
51
+ "vocab_size": 256000
52
+ }
53
+
54
+
55
+ [INFO|quantization.py:183] 2024-07-31 20:33:42,854 >> Quantizing model to 4 bit with bitsandbytes.
56
+
57
+ [INFO|patcher.py:79] 2024-07-31 20:33:42,854 >> Using KV cache for faster generation.
58
+
59
+ [INFO|modeling_utils.py:3553] 2024-07-31 20:33:42,951 >> loading weights file D:\models\gemma-2-9b\model.safetensors.index.json
60
+
61
+ [INFO|modeling_utils.py:1531] 2024-07-31 20:33:42,960 >> Instantiating Gemma2ForCausalLM model under default dtype torch.float16.
62
+
63
+ [INFO|configuration_utils.py:1000] 2024-07-31 20:33:42,961 >> Generate config GenerationConfig {
64
+ "bos_token_id": 2,
65
+ "cache_implementation": "hybrid",
66
+ "eos_token_id": 1,
67
+ "pad_token_id": 0
68
+ }
69
+
70
+
71
+ [INFO|modeling_utils.py:4364] 2024-07-31 20:34:18,478 >> All model checkpoint weights were used when initializing Gemma2ForCausalLM.
72
+
73
+
74
+ [INFO|modeling_utils.py:4372] 2024-07-31 20:34:18,478 >> All the weights of Gemma2ForCausalLM were initialized from the model checkpoint at D:\models\gemma-2-9b.
75
+ If your task is similar to the task the model of the checkpoint was trained on, you can already use Gemma2ForCausalLM for predictions without further training.
76
+
77
+ [INFO|configuration_utils.py:953] 2024-07-31 20:34:18,490 >> loading configuration file D:\models\gemma-2-9b\generation_config.json
78
+
79
+ [INFO|configuration_utils.py:1000] 2024-07-31 20:34:18,491 >> Generate config GenerationConfig {
80
+ "bos_token_id": 2,
81
+ "cache_implementation": "hybrid",
82
+ "eos_token_id": 1,
83
+ "pad_token_id": 0
84
+ }
85
+
86
+
87
+ [INFO|attention.py:80] 2024-07-31 20:34:18,806 >> Using torch SDPA for faster training and inference.
88
+
89
+ [INFO|adapter.py:203] 2024-07-31 20:34:19,915 >> Loaded adapter(s): saves\Gemma-2-9B\lora\train_2024-07-21-20-18-07
90
+
91
+ [INFO|loader.py:196] 2024-07-31 20:34:19,959 >> all params: 9,268,715,008
92
+
93
+ [INFO|trainer.py:3788] 2024-07-31 20:34:20,030 >>
94
+ ***** Running Prediction *****
95
+
96
+ [INFO|trainer.py:3790] 2024-07-31 20:34:20,030 >> Num examples = 17771
97
+
98
+ [INFO|trainer.py:3793] 2024-07-31 20:34:20,031 >> Batch size = 2
99
+
100
+ [INFO|trainer.py:127] 2024-08-01 00:16:02,179 >> Saving prediction results to saves\Gemma-2-9B\lora\eval_2024-07-21-20-18-07\generated_predictions.jsonl
101
+
eval_2024-07-21-20-18-07/trainer_log.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_2024-07-21-20-18-07/training_args.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adapter_name_or_path: saves\Gemma-2-9B\lora\train_2024-07-21-20-18-07
2
+ cutoff_len: 1024
3
+ dataset: test_data1
4
+ dataset_dir: data
5
+ do_predict: true
6
+ finetuning_type: lora
7
+ flash_attn: auto
8
+ max_new_tokens: 512
9
+ max_samples: 100000
10
+ model_name_or_path: D:\models\gemma-2-9b
11
+ output_dir: saves\Gemma-2-9B\lora\eval_2024-07-21-20-18-07
12
+ per_device_eval_batch_size: 2
13
+ predict_with_generate: true
14
+ preprocessing_num_workers: 16
15
+ quantization_bit: 4
16
+ quantization_method: bitsandbytes
17
+ stage: sft
18
+ temperature: 0.95
19
+ template: default
20
+ top_p: 0.7