Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- gpt2/gpt2-base/dskd/answers_dialogsum_10.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_dialogsum_20.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_dialogsum_30.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_dialogsum_40.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_dialogsum_50.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_dolly_10.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_dolly_20.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_dolly_30.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_dolly_40.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_dolly_50.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_self-inst_10.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_self-inst_20.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_self-inst_30.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_self-inst_40.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_self-inst_50.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_sinst_11__10.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_sinst_11__20.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_sinst_11__30.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_sinst_11__40.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_sinst_11__50.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_vicuna_10.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_vicuna_20.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_vicuna_30.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_vicuna_40.jsonl +0 -0
- gpt2/gpt2-base/dskd/answers_vicuna_50.jsonl +0 -0
- gpt2/gpt2-base/dskd/args.json +0 -1
- gpt2/gpt2-base/dskd/log.txt +0 -100
- gpt2/gpt2-base/dskd/rougeL_results.jsonl +0 -25
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_10.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_20.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_30.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_40.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_50.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_10.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_20.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_30.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_40.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_50.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_10.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_20.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_30.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_40.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_50.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__10.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__20.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__30.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__40.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__50.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_vicuna_10.jsonl +0 -0
- gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_vicuna_20.jsonl +0 -0
gpt2/gpt2-base/dskd/answers_dialogsum_10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_dialogsum_20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_dialogsum_30.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_dialogsum_40.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_dialogsum_50.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_dolly_10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_dolly_20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_dolly_30.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_dolly_40.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_dolly_50.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_self-inst_10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_self-inst_20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_self-inst_30.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_self-inst_40.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_self-inst_50.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_sinst_11__10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_sinst_11__20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_sinst_11__30.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_sinst_11__40.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_sinst_11__50.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_vicuna_10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_vicuna_20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_vicuna_30.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_vicuna_40.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/answers_vicuna_50.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dskd/args.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"model_path": "/workspace/DSKD/outputs/gpt2/gpt2-base/dskd/MCW_KD_GPT2_DSKD", "ckpt_name": null, "model_type": "outputs", "teacher_model_type": null, "n_gpu": 1, "n_nodes": 1, "teacher_model_path": null, "teacher_model_fp16": false, "model_parallel": false, "model_parallel_size": null, "no_value": false, "dropout_path_rate": null, "fp32": false, "model_dtype": "fp16", "task": "eval_main", "do_train": false, "do_valid": false, "do_eval": true, "base_path": "/workspace/DSKD", "load": null, "save_dir": "/workspace/DSKD/outputs/gpt2/gpt2-base/dskd", "log_interval": 10, "save_interval": 1000, "eval_interval": 1000, "local_rank": 0, "save_additional_suffix": "", "save_rollout": false, "eb_sample_times": 3, "keep_best_n_checkpoints": 3, "criterion": "cross_entropy", "eval_tqdm": false, "report_logits": false, "only_save_projector": false, "debug": false, "data_dir": "/workspace/DSKD/data/dialogsum", "processed_data_dir": null, "force_process": false, "force_process_demo": false, "data_process_workers": -1, "train_num": -1, "train_ratio": 1, "dev_num": -1, "dev_ratio": 1, "gen_num": -1, "data_names": "dialogsum", "prompt_type": null, "num_workers": 0, "max_prompt_length": 256, "min_prompt_length": 128, "json_data": true, "bin_data": false, "txt_data": false, "prompt_data_dir": null, "pretrain_data_dir": null, "eval_ppl": false, "eval_rw": false, "eval_gen": false, "only_prompt": false, "batch_size": 32, "eval_batch_size": 32, "clip_grad": 1.0, "total_iters": null, "train_iters_per_epoch": -1, "max_length": 512, "seed": 50, "seed_order": 42, "seed_data": 42, "seed_ppo": 42, "seed_lm": 7, "num_epochs": null, "training_epochs": 10000, "gradient_accumulation_steps": 1, "gradient_checkpointing": false, "attn_dtype": null, "lr": null, "lr_min": 1e-07, "weight_decay": 0.01, "loss_scale": 65536, "kd_rate": 0.5, "kd_temperature": 1.0, "kd_objective": "forward_kl", "teacher_temperature": 1.0, "label_smoothing": 0.0, "adaptive_kl_alpha": 0.5, "skew_lambda": 0.1, "warmup_iters": 0, "lr_decay_iters": null, "lr_decay_style": "noam", "scheduler_name": "constant_trm", "top_k": 0, "top_p": 1.0, "do_sample": true, "no_repeat_ngram_size": 6, "repetition_penalty": null, "num_beams": 1, "temperature": 1.0, "eval_gen_repeat_times": 3, "peft": null, "peft_lora_r": 16, "peft_lora_alpha": 64, "peft_lora_dropout": 0.1, "peft_name": null, "peft_path": null, "teacher_peft_name": null, "teacher_peft_path": null, "deepspeed": true, "deepspeed_config": "/workspace/DSKD/configs/deepspeed/ds_config_bf16.json", "deepscale": false, "deepscale_config": null, "projector_config_path": null, "projector_path": null, "projector_lr": 0.001, "pretrained_projector": null, "pretrained_projector_lr": 0.001, "vocab_alignment_path": null, "teacher_to_student_token_mapping": null, "teacher_to_student_id_mapping": null, "student_to_teacher_token_mapping": null, "student_to_teacher_id_mapping": null, "rank": 0, "world_size": 1}
|
|
|
|
|
|
gpt2/gpt2-base/dskd/log.txt
DELETED
|
@@ -1,100 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
============================== EXP at 2025-12-14 09:00:35 ==============================
|
| 4 |
-
test | name: dolly | {'exact_match': 2.4, 'rougeL': 24.5132} | lm_loss 6.9518 | avg. gen lenth: 64.03 | seed 10
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
============================== EXP at 2025-12-14 09:02:04 ==============================
|
| 8 |
-
test | name: dolly | {'exact_match': 2.0, 'rougeL': 23.9859} | lm_loss 6.9518 | avg. gen lenth: 64.87 | seed 20
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
============================== EXP at 2025-12-14 09:03:37 ==============================
|
| 12 |
-
test | name: dolly | {'exact_match': 2.0, 'rougeL': 24.5616} | lm_loss 6.9518 | avg. gen lenth: 63.996 | seed 30
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
============================== EXP at 2025-12-14 09:05:11 ==============================
|
| 16 |
-
test | name: dolly | {'exact_match': 2.6, 'rougeL': 24.6922} | lm_loss 6.9518 | avg. gen lenth: 62.03 | seed 40
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
============================== EXP at 2025-12-14 09:06:43 ==============================
|
| 20 |
-
test | name: dolly | {'exact_match': 2.2, 'rougeL': 24.1888} | lm_loss 6.9518 | avg. gen lenth: 64.408 | seed 50
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
============================== EXP at 2025-12-14 09:08:11 ==============================
|
| 24 |
-
test | name: self-inst | {'exact_match': 0.0, 'rougeL': 10.2346} | lm_loss 8.8444 | avg. gen lenth: 69.30578512396694 | seed 10
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
============================== EXP at 2025-12-14 09:09:10 ==============================
|
| 28 |
-
test | name: self-inst | {'exact_match': 0.0, 'rougeL': 11.09} | lm_loss 8.8444 | avg. gen lenth: 70.04132231404958 | seed 20
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
============================== EXP at 2025-12-14 09:10:13 ==============================
|
| 32 |
-
test | name: self-inst | {'exact_match': 0.0, 'rougeL': 10.5647} | lm_loss 8.8444 | avg. gen lenth: 70.7892561983471 | seed 30
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
============================== EXP at 2025-12-14 09:11:07 ==============================
|
| 36 |
-
test | name: self-inst | {'exact_match': 0.0, 'rougeL': 10.4475} | lm_loss 8.8444 | avg. gen lenth: 68.52066115702479 | seed 40
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
============================== EXP at 2025-12-14 09:11:58 ==============================
|
| 40 |
-
test | name: self-inst | {'exact_match': 0.0, 'rougeL': 10.4891} | lm_loss 8.8444 | avg. gen lenth: 67.91322314049587 | seed 50
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
============================== EXP at 2025-12-14 09:12:56 ==============================
|
| 44 |
-
test | name: vicuna | {'exact_match': 0.0, 'rougeL': 15.779} | lm_loss 7.0211 | avg. gen lenth: 117.8375 | seed 10
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
============================== EXP at 2025-12-14 09:13:34 ==============================
|
| 48 |
-
test | name: vicuna | {'exact_match': 0.0, 'rougeL': 15.5566} | lm_loss 7.0211 | avg. gen lenth: 112.9625 | seed 20
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
============================== EXP at 2025-12-14 09:14:12 ==============================
|
| 52 |
-
test | name: vicuna | {'exact_match': 0.0, 'rougeL': 15.6034} | lm_loss 7.0211 | avg. gen lenth: 115.35 | seed 30
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
============================== EXP at 2025-12-14 09:14:44 ==============================
|
| 56 |
-
test | name: vicuna | {'exact_match': 0.0, 'rougeL': 15.2329} | lm_loss 7.0211 | avg. gen lenth: 113.25 | seed 40
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
============================== EXP at 2025-12-14 09:15:17 ==============================
|
| 60 |
-
test | name: vicuna | {'exact_match': 0.0, 'rougeL': 15.5976} | lm_loss 7.0211 | avg. gen lenth: 106.1875 | seed 50
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
============================== EXP at 2025-12-14 09:15:52 ==============================
|
| 64 |
-
test | name: sinst/11_ | {'exact_match': 0.0, 'rougeL': 17.2637} | lm_loss 8.8454 | avg. gen lenth: 40.77508854781582 | seed 10
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
============================== EXP at 2025-12-14 09:19:12 ==============================
|
| 68 |
-
test | name: sinst/11_ | {'exact_match': 0.0, 'rougeL': 16.7415} | lm_loss 8.8454 | avg. gen lenth: 40.925619834710744 | seed 20
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
============================== EXP at 2025-12-14 09:22:36 ==============================
|
| 72 |
-
test | name: sinst/11_ | {'exact_match': 0.0, 'rougeL': 17.5263} | lm_loss 8.8454 | avg. gen lenth: 40.84238488783944 | seed 30
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
============================== EXP at 2025-12-14 09:25:57 ==============================
|
| 76 |
-
test | name: sinst/11_ | {'exact_match': 0.0, 'rougeL': 17.6793} | lm_loss 8.8454 | avg. gen lenth: 41.38724911452184 | seed 40
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
============================== EXP at 2025-12-14 09:29:16 ==============================
|
| 80 |
-
test | name: sinst/11_ | {'exact_match': 0.0, 'rougeL': 16.6866} | lm_loss 8.8454 | avg. gen lenth: 41.52951593860685 | seed 50
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
============================== EXP at 2025-12-14 09:32:41 ==============================
|
| 84 |
-
test | name: dialogsum | {'exact_match': 0.0, 'rougeL': 10.1323} | lm_loss nan | avg. gen lenth: 50.716 | seed 10
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
============================== EXP at 2025-12-14 09:37:10 ==============================
|
| 88 |
-
test | name: dialogsum | {'exact_match': 0.0, 'rougeL': 10.2727} | lm_loss nan | avg. gen lenth: 49.340666666666664 | seed 20
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
============================== EXP at 2025-12-14 09:41:12 ==============================
|
| 92 |
-
test | name: dialogsum | {'exact_match': 0.0, 'rougeL': 10.0085} | lm_loss nan | avg. gen lenth: 52.757333333333335 | seed 30
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
============================== EXP at 2025-12-14 09:45:26 ==============================
|
| 96 |
-
test | name: dialogsum | {'exact_match': 0.0, 'rougeL': 10.3976} | lm_loss nan | avg. gen lenth: 49.89066666666667 | seed 40
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
============================== EXP at 2025-12-14 09:49:43 ==============================
|
| 100 |
-
test | name: dialogsum | {'exact_match': 0.0, 'rougeL': 10.1758} | lm_loss nan | avg. gen lenth: 52.028666666666666 | seed 50
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gpt2/gpt2-base/dskd/rougeL_results.jsonl
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
{"dataname": "dolly", "seed": 10, "rougeL": 24.5132}
|
| 2 |
-
{"dataname": "dolly", "seed": 20, "rougeL": 23.9859}
|
| 3 |
-
{"dataname": "dolly", "seed": 30, "rougeL": 24.5616}
|
| 4 |
-
{"dataname": "dolly", "seed": 40, "rougeL": 24.6922}
|
| 5 |
-
{"dataname": "dolly", "seed": 50, "rougeL": 24.1888}
|
| 6 |
-
{"dataname": "self-inst", "seed": 10, "rougeL": 10.2346}
|
| 7 |
-
{"dataname": "self-inst", "seed": 20, "rougeL": 11.09}
|
| 8 |
-
{"dataname": "self-inst", "seed": 30, "rougeL": 10.5647}
|
| 9 |
-
{"dataname": "self-inst", "seed": 40, "rougeL": 10.4475}
|
| 10 |
-
{"dataname": "self-inst", "seed": 50, "rougeL": 10.4891}
|
| 11 |
-
{"dataname": "vicuna", "seed": 10, "rougeL": 15.779}
|
| 12 |
-
{"dataname": "vicuna", "seed": 20, "rougeL": 15.5566}
|
| 13 |
-
{"dataname": "vicuna", "seed": 30, "rougeL": 15.6034}
|
| 14 |
-
{"dataname": "vicuna", "seed": 40, "rougeL": 15.2329}
|
| 15 |
-
{"dataname": "vicuna", "seed": 50, "rougeL": 15.5976}
|
| 16 |
-
{"dataname": "sinst_11_", "seed": 10, "rougeL": 17.2637}
|
| 17 |
-
{"dataname": "sinst_11_", "seed": 20, "rougeL": 16.7415}
|
| 18 |
-
{"dataname": "sinst_11_", "seed": 30, "rougeL": 17.5263}
|
| 19 |
-
{"dataname": "sinst_11_", "seed": 40, "rougeL": 17.6793}
|
| 20 |
-
{"dataname": "sinst_11_", "seed": 50, "rougeL": 16.6866}
|
| 21 |
-
{"dataname": "dialogsum", "seed": 10, "rougeL": 10.1323}
|
| 22 |
-
{"dataname": "dialogsum", "seed": 20, "rougeL": 10.2727}
|
| 23 |
-
{"dataname": "dialogsum", "seed": 30, "rougeL": 10.0085}
|
| 24 |
-
{"dataname": "dialogsum", "seed": 40, "rougeL": 10.3976}
|
| 25 |
-
{"dataname": "dialogsum", "seed": 50, "rougeL": 10.1758}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_30.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_40.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dialogsum_50.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_30.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_40.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_dolly_50.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_30.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_40.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_self-inst_50.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__30.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__40.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_sinst_11__50.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_vicuna_10.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt2/gpt2-base/dual_space_kd_with_cma/criterion=dual_space_kd_with_cma__forward_kl-bf16__teacher=Qwen1.5-1.8B__kd^rate=0.5__kd^temp=2.0__epoch=20__bsz=4x2x1=8__lr=0.0005__proj^lr=0.001/answers_vicuna_20.jsonl
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|