Upload 7 files
Browse files- eval_2024-07-17-00-34-20/all_results.json +9 -0
- eval_2024-07-17-00-34-20/generated_predictions.jsonl +0 -0
- eval_2024-07-17-00-34-20/llamaboard_config.yaml +21 -0
- eval_2024-07-17-00-34-20/predict_results.json +9 -0
- eval_2024-07-17-00-34-20/running_log.txt +96 -0
- eval_2024-07-17-00-34-20/trainer_log.jsonl +0 -0
- eval_2024-07-17-00-34-20/training_args.yaml +19 -0
eval_2024-07-17-00-34-20/all_results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"predict_bleu-4": 78.874164847223,
|
| 3 |
+
"predict_rouge-1": 95.77926588261774,
|
| 4 |
+
"predict_rouge-2": 31.06746947273648,
|
| 5 |
+
"predict_rouge-l": 95.77926588261774,
|
| 6 |
+
"predict_runtime": 19261.7879,
|
| 7 |
+
"predict_samples_per_second": 0.923,
|
| 8 |
+
"predict_steps_per_second": 0.461
|
| 9 |
+
}
|
eval_2024-07-17-00-34-20/generated_predictions.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval_2024-07-17-00-34-20/llamaboard_config.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
eval.batch_size: 2
|
| 2 |
+
eval.cutoff_len: 1024
|
| 3 |
+
eval.dataset:
|
| 4 |
+
- test_data1
|
| 5 |
+
eval.dataset_dir: data
|
| 6 |
+
eval.max_new_tokens: 512
|
| 7 |
+
eval.max_samples: '100000'
|
| 8 |
+
eval.output_dir: eval_2024-07-17-00-34-20
|
| 9 |
+
eval.predict: true
|
| 10 |
+
eval.temperature: 0.95
|
| 11 |
+
eval.top_p: 0.7
|
| 12 |
+
top.booster: auto
|
| 13 |
+
top.checkpoint_path:
|
| 14 |
+
- train_2024-07-05-17-07-59
|
| 15 |
+
top.finetuning_type: lora
|
| 16 |
+
top.model_name: Qwen2-7B
|
| 17 |
+
top.quantization_bit: none
|
| 18 |
+
top.quantization_method: bitsandbytes
|
| 19 |
+
top.rope_scaling: none
|
| 20 |
+
top.template: default
|
| 21 |
+
top.visual_inputs: false
|
eval_2024-07-17-00-34-20/predict_results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"predict_bleu-4": 78.874164847223,
|
| 3 |
+
"predict_rouge-1": 95.77926588261774,
|
| 4 |
+
"predict_rouge-2": 31.06746947273648,
|
| 5 |
+
"predict_rouge-l": 95.77926588261774,
|
| 6 |
+
"predict_runtime": 19261.7879,
|
| 7 |
+
"predict_samples_per_second": 0.923,
|
| 8 |
+
"predict_steps_per_second": 0.461
|
| 9 |
+
}
|
eval_2024-07-17-00-34-20/running_log.txt
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[INFO|parser.py:317] 2024-07-17 00:34:50,446 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: None
|
| 2 |
+
|
| 3 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,482 >> loading file vocab.json
|
| 4 |
+
|
| 5 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,482 >> loading file merges.txt
|
| 6 |
+
|
| 7 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,484 >> loading file tokenizer.json
|
| 8 |
+
|
| 9 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,484 >> loading file added_tokens.json
|
| 10 |
+
|
| 11 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,485 >> loading file special_tokens_map.json
|
| 12 |
+
|
| 13 |
+
[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,485 >> loading file tokenizer_config.json
|
| 14 |
+
|
| 15 |
+
[WARNING|logging.py:313] 2024-07-17 00:34:50,771 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
| 16 |
+
|
| 17 |
+
[INFO|loader.py:50] 2024-07-17 00:34:50,772 >> Loading dataset test_data1.json...
|
| 18 |
+
|
| 19 |
+
[INFO|configuration_utils.py:731] 2024-07-17 00:34:51,772 >> loading configuration file D:\models\Qwen2-7B\config.json
|
| 20 |
+
|
| 21 |
+
[INFO|configuration_utils.py:800] 2024-07-17 00:34:51,774 >> Model config Qwen2Config {
|
| 22 |
+
"_name_or_path": "D:\\models\\Qwen2-7B",
|
| 23 |
+
"architectures": [
|
| 24 |
+
"Qwen2ForCausalLM"
|
| 25 |
+
],
|
| 26 |
+
"attention_dropout": 0.0,
|
| 27 |
+
"bos_token_id": 151643,
|
| 28 |
+
"eos_token_id": 151643,
|
| 29 |
+
"hidden_act": "silu",
|
| 30 |
+
"hidden_size": 3584,
|
| 31 |
+
"initializer_range": 0.02,
|
| 32 |
+
"intermediate_size": 18944,
|
| 33 |
+
"max_position_embeddings": 131072,
|
| 34 |
+
"max_window_layers": 28,
|
| 35 |
+
"model_type": "qwen2",
|
| 36 |
+
"num_attention_heads": 28,
|
| 37 |
+
"num_hidden_layers": 28,
|
| 38 |
+
"num_key_value_heads": 4,
|
| 39 |
+
"rms_norm_eps": 1e-06,
|
| 40 |
+
"rope_theta": 1000000.0,
|
| 41 |
+
"sliding_window": 131072,
|
| 42 |
+
"tie_word_embeddings": false,
|
| 43 |
+
"torch_dtype": "bfloat16",
|
| 44 |
+
"transformers_version": "4.42.3",
|
| 45 |
+
"use_cache": true,
|
| 46 |
+
"use_sliding_window": false,
|
| 47 |
+
"vocab_size": 152064
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
[INFO|patcher.py:79] 2024-07-17 00:34:51,775 >> Using KV cache for faster generation.
|
| 52 |
+
|
| 53 |
+
[INFO|modeling_utils.py:3553] 2024-07-17 00:34:51,873 >> loading weights file D:\models\Qwen2-7B\model.safetensors.index.json
|
| 54 |
+
|
| 55 |
+
[INFO|modeling_utils.py:1531] 2024-07-17 00:34:51,874 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.
|
| 56 |
+
|
| 57 |
+
[INFO|configuration_utils.py:1000] 2024-07-17 00:34:51,876 >> Generate config GenerationConfig {
|
| 58 |
+
"bos_token_id": 151643,
|
| 59 |
+
"eos_token_id": 151643
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
[INFO|modeling_utils.py:4364] 2024-07-17 00:35:20,895 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
[INFO|modeling_utils.py:4372] 2024-07-17 00:35:20,895 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at D:\models\Qwen2-7B.
|
| 67 |
+
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.
|
| 68 |
+
|
| 69 |
+
[INFO|configuration_utils.py:953] 2024-07-17 00:35:20,899 >> loading configuration file D:\models\Qwen2-7B\generation_config.json
|
| 70 |
+
|
| 71 |
+
[INFO|configuration_utils.py:1000] 2024-07-17 00:35:20,900 >> Generate config GenerationConfig {
|
| 72 |
+
"bos_token_id": 151643,
|
| 73 |
+
"eos_token_id": 151643,
|
| 74 |
+
"max_new_tokens": 2048
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
[INFO|attention.py:80] 2024-07-17 00:35:21,646 >> Using torch SDPA for faster training and inference.
|
| 79 |
+
|
| 80 |
+
[INFO|adapter.py:195] 2024-07-17 00:35:22,672 >> Merged 1 adapter(s).
|
| 81 |
+
|
| 82 |
+
[INFO|adapter.py:203] 2024-07-17 00:35:22,673 >> Loaded adapter(s): saves\Qwen2-7B\lora\train_2024-07-05-17-07-59
|
| 83 |
+
|
| 84 |
+
[INFO|loader.py:196] 2024-07-17 00:35:22,679 >> all params: 7,615,616,512
|
| 85 |
+
|
| 86 |
+
[INFO|trainer.py:3788] 2024-07-17 00:35:22,749 >>
|
| 87 |
+
***** Running Prediction *****
|
| 88 |
+
|
| 89 |
+
[INFO|trainer.py:3790] 2024-07-17 00:35:22,749 >> Num examples = 17771
|
| 90 |
+
|
| 91 |
+
[INFO|trainer.py:3793] 2024-07-17 00:35:22,750 >> Batch size = 2
|
| 92 |
+
|
| 93 |
+
[WARNING|logging.py:328] 2024-07-17 00:35:24,938 >> We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
|
| 94 |
+
|
| 95 |
+
[INFO|trainer.py:127] 2024-07-17 05:56:24,534 >> Saving prediction results to saves\Qwen2-7B\lora\eval_2024-07-17-00-34-20\generated_predictions.jsonl
|
| 96 |
+
|
eval_2024-07-17-00-34-20/trainer_log.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval_2024-07-17-00-34-20/training_args.yaml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adapter_name_or_path: saves\Qwen2-7B\lora\train_2024-07-05-17-07-59
|
| 2 |
+
cutoff_len: 1024
|
| 3 |
+
dataset: test_data1
|
| 4 |
+
dataset_dir: data
|
| 5 |
+
do_predict: true
|
| 6 |
+
finetuning_type: lora
|
| 7 |
+
flash_attn: auto
|
| 8 |
+
max_new_tokens: 512
|
| 9 |
+
max_samples: 100000
|
| 10 |
+
model_name_or_path: D:\models\Qwen2-7B
|
| 11 |
+
output_dir: saves\Qwen2-7B\lora\eval_2024-07-17-00-34-20
|
| 12 |
+
per_device_eval_batch_size: 2
|
| 13 |
+
predict_with_generate: true
|
| 14 |
+
preprocessing_num_workers: 16
|
| 15 |
+
quantization_method: bitsandbytes
|
| 16 |
+
stage: sft
|
| 17 |
+
temperature: 0.95
|
| 18 |
+
template: default
|
| 19 |
+
top_p: 0.7
|