Upload 7 files
Browse files- eval_2024-07-21-20-18-07/all_results.json +9 -0
- eval_2024-07-21-20-18-07/generated_predictions.jsonl +0 -0
- eval_2024-07-21-20-18-07/llamaboard_config.yaml +21 -0
- eval_2024-07-21-20-18-07/predict_results.json +9 -0
- eval_2024-07-21-20-18-07/running_log.txt +101 -0
- eval_2024-07-21-20-18-07/trainer_log.jsonl +0 -0
- eval_2024-07-21-20-18-07/training_args.yaml +20 -0
eval_2024-07-21-20-18-07/all_results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"predict_bleu-4": 79.14533431433236,
|
| 3 |
+
"predict_rouge-1": 96.22080920600978,
|
| 4 |
+
"predict_rouge-2": 31.089978054133137,
|
| 5 |
+
"predict_rouge-l": 96.22080920600978,
|
| 6 |
+
"predict_runtime": 13302.1594,
|
| 7 |
+
"predict_samples_per_second": 1.336,
|
| 8 |
+
"predict_steps_per_second": 0.668
|
| 9 |
+
}
|
eval_2024-07-21-20-18-07/generated_predictions.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval_2024-07-21-20-18-07/llamaboard_config.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
eval.batch_size: 2
|
| 2 |
+
eval.cutoff_len: 1024
|
| 3 |
+
eval.dataset:
|
| 4 |
+
- test_data1
|
| 5 |
+
eval.dataset_dir: data
|
| 6 |
+
eval.max_new_tokens: 512
|
| 7 |
+
eval.max_samples: '100000'
|
| 8 |
+
eval.output_dir: eval_2024-07-21-20-18-07
|
| 9 |
+
eval.predict: true
|
| 10 |
+
eval.temperature: 0.95
|
| 11 |
+
eval.top_p: 0.7
|
| 12 |
+
top.booster: auto
|
| 13 |
+
top.checkpoint_path:
|
| 14 |
+
- train_2024-07-21-20-18-07
|
| 15 |
+
top.finetuning_type: lora
|
| 16 |
+
top.model_name: Gemma-2-9B
|
| 17 |
+
top.quantization_bit: '4'
|
| 18 |
+
top.quantization_method: bitsandbytes
|
| 19 |
+
top.rope_scaling: none
|
| 20 |
+
top.template: default
|
| 21 |
+
top.visual_inputs: false
|
eval_2024-07-21-20-18-07/predict_results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"predict_bleu-4": 79.14533431433236,
|
| 3 |
+
"predict_rouge-1": 96.22080920600978,
|
| 4 |
+
"predict_rouge-2": 31.089978054133137,
|
| 5 |
+
"predict_rouge-l": 96.22080920600978,
|
| 6 |
+
"predict_runtime": 13302.1594,
|
| 7 |
+
"predict_samples_per_second": 1.336,
|
| 8 |
+
"predict_steps_per_second": 0.668
|
| 9 |
+
}
|
eval_2024-07-21-20-18-07/running_log.txt
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[WARNING|parser.py:255] 2024-07-31 20:33:17,976 >> Evaluating model in 4/8-bit mode may cause lower scores.
|
| 2 |
+
|
| 3 |
+
[INFO|parser.py:317] 2024-07-31 20:33:17,978 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: None
|
| 4 |
+
|
| 5 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,076 >> loading file tokenizer.model
|
| 6 |
+
|
| 7 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,076 >> loading file tokenizer.json
|
| 8 |
+
|
| 9 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,077 >> loading file added_tokens.json
|
| 10 |
+
|
| 11 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,078 >> loading file special_tokens_map.json
|
| 12 |
+
|
| 13 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,078 >> loading file tokenizer_config.json
|
| 14 |
+
|
| 15 |
+
[INFO|loader.py:50] 2024-07-31 20:33:18,960 >> Loading dataset test_data1.json...
|
| 16 |
+
|
| 17 |
+
[INFO|configuration_utils.py:731] 2024-07-31 20:33:42,848 >> loading configuration file D:\models\gemma-2-9b\config.json
|
| 18 |
+
|
| 19 |
+
[INFO|configuration_utils.py:800] 2024-07-31 20:33:42,851 >> Model config Gemma2Config {
|
| 20 |
+
"_name_or_path": "D:\\models\\gemma-2-9b",
|
| 21 |
+
"architectures": [
|
| 22 |
+
"Gemma2ForCausalLM"
|
| 23 |
+
],
|
| 24 |
+
"attention_bias": false,
|
| 25 |
+
"attention_dropout": 0.0,
|
| 26 |
+
"attn_logit_softcapping": 50.0,
|
| 27 |
+
"bos_token_id": 2,
|
| 28 |
+
"cache_implementation": "hybrid",
|
| 29 |
+
"eos_token_id": 1,
|
| 30 |
+
"final_logit_softcapping": 30.0,
|
| 31 |
+
"head_dim": 256,
|
| 32 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 33 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 34 |
+
"hidden_size": 3584,
|
| 35 |
+
"initializer_range": 0.02,
|
| 36 |
+
"intermediate_size": 14336,
|
| 37 |
+
"max_position_embeddings": 8192,
|
| 38 |
+
"model_type": "gemma2",
|
| 39 |
+
"num_attention_heads": 16,
|
| 40 |
+
"num_hidden_layers": 42,
|
| 41 |
+
"num_key_value_heads": 8,
|
| 42 |
+
"pad_token_id": 0,
|
| 43 |
+
"query_pre_attn_scalar": 224,
|
| 44 |
+
"rms_norm_eps": 1e-06,
|
| 45 |
+
"rope_theta": 10000.0,
|
| 46 |
+
"sliding_window": 4096,
|
| 47 |
+
"sliding_window_size": 4096,
|
| 48 |
+
"torch_dtype": "float32",
|
| 49 |
+
"transformers_version": "4.42.3",
|
| 50 |
+
"use_cache": true,
|
| 51 |
+
"vocab_size": 256000
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
[INFO|quantization.py:183] 2024-07-31 20:33:42,854 >> Quantizing model to 4 bit with bitsandbytes.
|
| 56 |
+
|
| 57 |
+
[INFO|patcher.py:79] 2024-07-31 20:33:42,854 >> Using KV cache for faster generation.
|
| 58 |
+
|
| 59 |
+
[INFO|modeling_utils.py:3553] 2024-07-31 20:33:42,951 >> loading weights file D:\models\gemma-2-9b\model.safetensors.index.json
|
| 60 |
+
|
| 61 |
+
[INFO|modeling_utils.py:1531] 2024-07-31 20:33:42,960 >> Instantiating Gemma2ForCausalLM model under default dtype torch.float16.
|
| 62 |
+
|
| 63 |
+
[INFO|configuration_utils.py:1000] 2024-07-31 20:33:42,961 >> Generate config GenerationConfig {
|
| 64 |
+
"bos_token_id": 2,
|
| 65 |
+
"cache_implementation": "hybrid",
|
| 66 |
+
"eos_token_id": 1,
|
| 67 |
+
"pad_token_id": 0
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
[INFO|modeling_utils.py:4364] 2024-07-31 20:34:18,478 >> All model checkpoint weights were used when initializing Gemma2ForCausalLM.
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
[INFO|modeling_utils.py:4372] 2024-07-31 20:34:18,478 >> All the weights of Gemma2ForCausalLM were initialized from the model checkpoint at D:\models\gemma-2-9b.
|
| 75 |
+
If your task is similar to the task the model of the checkpoint was trained on, you can already use Gemma2ForCausalLM for predictions without further training.
|
| 76 |
+
|
| 77 |
+
[INFO|configuration_utils.py:953] 2024-07-31 20:34:18,490 >> loading configuration file D:\models\gemma-2-9b\generation_config.json
|
| 78 |
+
|
| 79 |
+
[INFO|configuration_utils.py:1000] 2024-07-31 20:34:18,491 >> Generate config GenerationConfig {
|
| 80 |
+
"bos_token_id": 2,
|
| 81 |
+
"cache_implementation": "hybrid",
|
| 82 |
+
"eos_token_id": 1,
|
| 83 |
+
"pad_token_id": 0
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
[INFO|attention.py:80] 2024-07-31 20:34:18,806 >> Using torch SDPA for faster training and inference.
|
| 88 |
+
|
| 89 |
+
[INFO|adapter.py:203] 2024-07-31 20:34:19,915 >> Loaded adapter(s): saves\Gemma-2-9B\lora\train_2024-07-21-20-18-07
|
| 90 |
+
|
| 91 |
+
[INFO|loader.py:196] 2024-07-31 20:34:19,959 >> all params: 9,268,715,008
|
| 92 |
+
|
| 93 |
+
[INFO|trainer.py:3788] 2024-07-31 20:34:20,030 >>
|
| 94 |
+
***** Running Prediction *****
|
| 95 |
+
|
| 96 |
+
[INFO|trainer.py:3790] 2024-07-31 20:34:20,030 >> Num examples = 17771
|
| 97 |
+
|
| 98 |
+
[INFO|trainer.py:3793] 2024-07-31 20:34:20,031 >> Batch size = 2
|
| 99 |
+
|
| 100 |
+
[INFO|trainer.py:127] 2024-08-01 00:16:02,179 >> Saving prediction results to saves\Gemma-2-9B\lora\eval_2024-07-21-20-18-07\generated_predictions.jsonl
|
| 101 |
+
|
eval_2024-07-21-20-18-07/trainer_log.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval_2024-07-21-20-18-07/training_args.yaml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adapter_name_or_path: saves\Gemma-2-9B\lora\train_2024-07-21-20-18-07
|
| 2 |
+
cutoff_len: 1024
|
| 3 |
+
dataset: test_data1
|
| 4 |
+
dataset_dir: data
|
| 5 |
+
do_predict: true
|
| 6 |
+
finetuning_type: lora
|
| 7 |
+
flash_attn: auto
|
| 8 |
+
max_new_tokens: 512
|
| 9 |
+
max_samples: 100000
|
| 10 |
+
model_name_or_path: D:\models\gemma-2-9b
|
| 11 |
+
output_dir: saves\Gemma-2-9B\lora\eval_2024-07-21-20-18-07
|
| 12 |
+
per_device_eval_batch_size: 2
|
| 13 |
+
predict_with_generate: true
|
| 14 |
+
preprocessing_num_workers: 16
|
| 15 |
+
quantization_bit: 4
|
| 16 |
+
quantization_method: bitsandbytes
|
| 17 |
+
stage: sft
|
| 18 |
+
temperature: 0.95
|
| 19 |
+
template: default
|
| 20 |
+
top_p: 0.7
|