Upload 7 files

Browse files

Files changed (7) hide show

eval_2024-07-17-00-34-20/all_results.json +9 -0
eval_2024-07-17-00-34-20/generated_predictions.jsonl +0 -0
eval_2024-07-17-00-34-20/llamaboard_config.yaml +21 -0
eval_2024-07-17-00-34-20/predict_results.json +9 -0
eval_2024-07-17-00-34-20/running_log.txt +96 -0
eval_2024-07-17-00-34-20/trainer_log.jsonl +0 -0
eval_2024-07-17-00-34-20/training_args.yaml +19 -0

eval_2024-07-17-00-34-20/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "predict_bleu-4": 78.874164847223,
+    "predict_rouge-1": 95.77926588261774,
+    "predict_rouge-2": 31.06746947273648,
+    "predict_rouge-l": 95.77926588261774,
+    "predict_runtime": 19261.7879,
+    "predict_samples_per_second": 0.923,
+    "predict_steps_per_second": 0.461
+}

eval_2024-07-17-00-34-20/generated_predictions.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

eval_2024-07-17-00-34-20/llamaboard_config.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+eval.batch_size: 2
+eval.cutoff_len: 1024
+eval.dataset:
+- test_data1
+eval.dataset_dir: data
+eval.max_new_tokens: 512
+eval.max_samples: '100000'
+eval.output_dir: eval_2024-07-17-00-34-20
+eval.predict: true
+eval.temperature: 0.95
+eval.top_p: 0.7
+top.booster: auto
+top.checkpoint_path:
+- train_2024-07-05-17-07-59
+top.finetuning_type: lora
+top.model_name: Qwen2-7B
+top.quantization_bit: none
+top.quantization_method: bitsandbytes
+top.rope_scaling: none
+top.template: default
+top.visual_inputs: false

eval_2024-07-17-00-34-20/predict_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "predict_bleu-4": 78.874164847223,
+    "predict_rouge-1": 95.77926588261774,
+    "predict_rouge-2": 31.06746947273648,
+    "predict_rouge-l": 95.77926588261774,
+    "predict_runtime": 19261.7879,
+    "predict_samples_per_second": 0.923,
+    "predict_steps_per_second": 0.461
+}

eval_2024-07-17-00-34-20/running_log.txt ADDED Viewed

	@@ -0,0 +1,96 @@

+[INFO|parser.py:317] 2024-07-17 00:34:50,446 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: None
+[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,482 >> loading file vocab.json
+[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,482 >> loading file merges.txt
+[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,484 >> loading file tokenizer.json
+[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,484 >> loading file added_tokens.json
+[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,485 >> loading file special_tokens_map.json
+[INFO|tokenization_utils_base.py:2159] 2024-07-17 00:34:50,485 >> loading file tokenizer_config.json
+[WARNING|logging.py:313] 2024-07-17 00:34:50,771 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+[INFO|loader.py:50] 2024-07-17 00:34:50,772 >> Loading dataset test_data1.json...
+[INFO|configuration_utils.py:731] 2024-07-17 00:34:51,772 >> loading configuration file D:\models\Qwen2-7B\config.json
+[INFO|configuration_utils.py:800] 2024-07-17 00:34:51,774 >> Model config Qwen2Config {
+  "_name_or_path": "D:\\models\\Qwen2-7B",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 131072,
+  "max_window_layers": 28,
+  "model_type": "qwen2",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "sliding_window": 131072,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.42.3",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 152064
+}
+[INFO|patcher.py:79] 2024-07-17 00:34:51,775 >> Using KV cache for faster generation.
+[INFO|modeling_utils.py:3553] 2024-07-17 00:34:51,873 >> loading weights file D:\models\Qwen2-7B\model.safetensors.index.json
+[INFO|modeling_utils.py:1531] 2024-07-17 00:34:51,874 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.
+[INFO|configuration_utils.py:1000] 2024-07-17 00:34:51,876 >> Generate config GenerationConfig {
+  "bos_token_id": 151643,
+  "eos_token_id": 151643
+}
+[INFO|modeling_utils.py:4364] 2024-07-17 00:35:20,895 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.
+[INFO|modeling_utils.py:4372] 2024-07-17 00:35:20,895 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at D:\models\Qwen2-7B.
+If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.
+[INFO|configuration_utils.py:953] 2024-07-17 00:35:20,899 >> loading configuration file D:\models\Qwen2-7B\generation_config.json
+[INFO|configuration_utils.py:1000] 2024-07-17 00:35:20,900 >> Generate config GenerationConfig {
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "max_new_tokens": 2048
+}
+[INFO|attention.py:80] 2024-07-17 00:35:21,646 >> Using torch SDPA for faster training and inference.
+[INFO|adapter.py:195] 2024-07-17 00:35:22,672 >> Merged 1 adapter(s).
+[INFO|adapter.py:203] 2024-07-17 00:35:22,673 >> Loaded adapter(s): saves\Qwen2-7B\lora\train_2024-07-05-17-07-59
+[INFO|loader.py:196] 2024-07-17 00:35:22,679 >> all params: 7,615,616,512
+[INFO|trainer.py:3788] 2024-07-17 00:35:22,749 >>
+***** Running Prediction *****
+[INFO|trainer.py:3790] 2024-07-17 00:35:22,749 >>   Num examples = 17771
+[INFO|trainer.py:3793] 2024-07-17 00:35:22,750 >>   Batch size = 2
+[WARNING|logging.py:328] 2024-07-17 00:35:24,938 >> We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
+[INFO|trainer.py:127] 2024-07-17 05:56:24,534 >> Saving prediction results to saves\Qwen2-7B\lora\eval_2024-07-17-00-34-20\generated_predictions.jsonl

eval_2024-07-17-00-34-20/trainer_log.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

eval_2024-07-17-00-34-20/training_args.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+adapter_name_or_path: saves\Qwen2-7B\lora\train_2024-07-05-17-07-59
+cutoff_len: 1024
+dataset: test_data1
+dataset_dir: data
+do_predict: true
+finetuning_type: lora
+flash_attn: auto
+max_new_tokens: 512
+max_samples: 100000
+model_name_or_path: D:\models\Qwen2-7B
+output_dir: saves\Qwen2-7B\lora\eval_2024-07-17-00-34-20
+per_device_eval_batch_size: 2
+predict_with_generate: true
+preprocessing_num_workers: 16
+quantization_method: bitsandbytes
+stage: sft
+temperature: 0.95
+template: default
+top_p: 0.7