Upload 7 files

Browse files

Files changed (7) hide show

eval_2024-07-21-20-18-07/all_results.json +9 -0
eval_2024-07-21-20-18-07/generated_predictions.jsonl +0 -0
eval_2024-07-21-20-18-07/llamaboard_config.yaml +21 -0
eval_2024-07-21-20-18-07/predict_results.json +9 -0
eval_2024-07-21-20-18-07/running_log.txt +101 -0
eval_2024-07-21-20-18-07/trainer_log.jsonl +0 -0
eval_2024-07-21-20-18-07/training_args.yaml +20 -0

eval_2024-07-21-20-18-07/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "predict_bleu-4": 79.14533431433236,
+    "predict_rouge-1": 96.22080920600978,
+    "predict_rouge-2": 31.089978054133137,
+    "predict_rouge-l": 96.22080920600978,
+    "predict_runtime": 13302.1594,
+    "predict_samples_per_second": 1.336,
+    "predict_steps_per_second": 0.668
+}

eval_2024-07-21-20-18-07/generated_predictions.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

eval_2024-07-21-20-18-07/llamaboard_config.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+eval.batch_size: 2
+eval.cutoff_len: 1024
+eval.dataset:
+- test_data1
+eval.dataset_dir: data
+eval.max_new_tokens: 512
+eval.max_samples: '100000'
+eval.output_dir: eval_2024-07-21-20-18-07
+eval.predict: true
+eval.temperature: 0.95
+eval.top_p: 0.7
+top.booster: auto
+top.checkpoint_path:
+- train_2024-07-21-20-18-07
+top.finetuning_type: lora
+top.model_name: Gemma-2-9B
+top.quantization_bit: '4'
+top.quantization_method: bitsandbytes
+top.rope_scaling: none
+top.template: default
+top.visual_inputs: false

eval_2024-07-21-20-18-07/predict_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "predict_bleu-4": 79.14533431433236,
+    "predict_rouge-1": 96.22080920600978,
+    "predict_rouge-2": 31.089978054133137,
+    "predict_rouge-l": 96.22080920600978,
+    "predict_runtime": 13302.1594,
+    "predict_samples_per_second": 1.336,
+    "predict_steps_per_second": 0.668
+}

eval_2024-07-21-20-18-07/running_log.txt ADDED Viewed

	@@ -0,0 +1,101 @@

+[WARNING|parser.py:255] 2024-07-31 20:33:17,976 >> Evaluating model in 4/8-bit mode may cause lower scores.
+[INFO|parser.py:317] 2024-07-31 20:33:17,978 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: None
+[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,076 >> loading file tokenizer.model
+[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,076 >> loading file tokenizer.json
+[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,077 >> loading file added_tokens.json
+[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,078 >> loading file special_tokens_map.json
+[INFO|tokenization_utils_base.py:2159] 2024-07-31 20:33:18,078 >> loading file tokenizer_config.json
+[INFO|loader.py:50] 2024-07-31 20:33:18,960 >> Loading dataset test_data1.json...
+[INFO|configuration_utils.py:731] 2024-07-31 20:33:42,848 >> loading configuration file D:\models\gemma-2-9b\config.json
+[INFO|configuration_utils.py:800] 2024-07-31 20:33:42,851 >> Model config Gemma2Config {
+  "_name_or_path": "D:\\models\\gemma-2-9b",
+  "architectures": [
+    "Gemma2ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": 50.0,
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "final_logit_softcapping": 30.0,
+  "head_dim": 256,
+  "hidden_act": "gelu_pytorch_tanh",
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 3584,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 8192,
+  "model_type": "gemma2",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 42,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 224,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "sliding_window_size": 4096,
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.3",
+  "use_cache": true,
+  "vocab_size": 256000
+}
+[INFO|quantization.py:183] 2024-07-31 20:33:42,854 >> Quantizing model to 4 bit with bitsandbytes.
+[INFO|patcher.py:79] 2024-07-31 20:33:42,854 >> Using KV cache for faster generation.
+[INFO|modeling_utils.py:3553] 2024-07-31 20:33:42,951 >> loading weights file D:\models\gemma-2-9b\model.safetensors.index.json
+[INFO|modeling_utils.py:1531] 2024-07-31 20:33:42,960 >> Instantiating Gemma2ForCausalLM model under default dtype torch.float16.
+[INFO|configuration_utils.py:1000] 2024-07-31 20:33:42,961 >> Generate config GenerationConfig {
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "pad_token_id": 0
+}
+[INFO|modeling_utils.py:4364] 2024-07-31 20:34:18,478 >> All model checkpoint weights were used when initializing Gemma2ForCausalLM.
+[INFO|modeling_utils.py:4372] 2024-07-31 20:34:18,478 >> All the weights of Gemma2ForCausalLM were initialized from the model checkpoint at D:\models\gemma-2-9b.
+If your task is similar to the task the model of the checkpoint was trained on, you can already use Gemma2ForCausalLM for predictions without further training.
+[INFO|configuration_utils.py:953] 2024-07-31 20:34:18,490 >> loading configuration file D:\models\gemma-2-9b\generation_config.json
+[INFO|configuration_utils.py:1000] 2024-07-31 20:34:18,491 >> Generate config GenerationConfig {
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "pad_token_id": 0
+}
+[INFO|attention.py:80] 2024-07-31 20:34:18,806 >> Using torch SDPA for faster training and inference.
+[INFO|adapter.py:203] 2024-07-31 20:34:19,915 >> Loaded adapter(s): saves\Gemma-2-9B\lora\train_2024-07-21-20-18-07
+[INFO|loader.py:196] 2024-07-31 20:34:19,959 >> all params: 9,268,715,008
+[INFO|trainer.py:3788] 2024-07-31 20:34:20,030 >>
+***** Running Prediction *****
+[INFO|trainer.py:3790] 2024-07-31 20:34:20,030 >>   Num examples = 17771
+[INFO|trainer.py:3793] 2024-07-31 20:34:20,031 >>   Batch size = 2
+[INFO|trainer.py:127] 2024-08-01 00:16:02,179 >> Saving prediction results to saves\Gemma-2-9B\lora\eval_2024-07-21-20-18-07\generated_predictions.jsonl

eval_2024-07-21-20-18-07/trainer_log.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

eval_2024-07-21-20-18-07/training_args.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+adapter_name_or_path: saves\Gemma-2-9B\lora\train_2024-07-21-20-18-07
+cutoff_len: 1024
+dataset: test_data1
+dataset_dir: data
+do_predict: true
+finetuning_type: lora
+flash_attn: auto
+max_new_tokens: 512
+max_samples: 100000
+model_name_or_path: D:\models\gemma-2-9b
+output_dir: saves\Gemma-2-9B\lora\eval_2024-07-21-20-18-07
+per_device_eval_batch_size: 2
+predict_with_generate: true
+preprocessing_num_workers: 16
+quantization_bit: 4
+quantization_method: bitsandbytes
+stage: sft
+temperature: 0.95
+template: default
+top_p: 0.7