Pritish92 commited on
Commit
8f75784
·
verified ·
1 Parent(s): bd2d239

Upload Assignment 2 artifacts

Browse files
part1_dare_results.json CHANGED
@@ -4,56 +4,56 @@
4
  "drop_rate_p": 0.1,
5
  "density": 0.9,
6
  "checkpoint_kept": true,
7
- "rougeL": 0.4983525876093482,
8
- "meteor": 0.5409804786486961,
9
- "bleu": 49.77929949498992,
10
- "exact_match": 0.41846758349705304,
11
- "mean_text_metric": 16.939544187082657,
12
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
13
  },
14
  {
15
  "drop_rate_p": 0.3,
16
  "density": 0.7,
17
  "checkpoint_kept": true,
18
- "rougeL": 0.5006086651075169,
19
- "meteor": 0.5435733528456694,
20
- "bleu": 49.93061260818402,
21
- "exact_match": 0.4214145383104126,
22
- "mean_text_metric": 16.9915982087124,
23
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
24
  },
25
  {
26
  "drop_rate_p": 0.5,
27
  "density": 0.5,
28
  "checkpoint_kept": true,
29
- "rougeL": 0.5013032195456173,
30
- "meteor": 0.5439497705400033,
31
- "bleu": 49.9684049416312,
32
- "exact_match": 0.4223968565815324,
33
- "mean_text_metric": 17.00455264390561,
34
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
35
  },
36
  {
37
  "drop_rate_p": 0.7,
38
  "density": 0.3,
39
- "checkpoint_kept": true,
40
- "rougeL": 0.5028853392903572,
41
- "meteor": 0.5450294405327193,
42
- "bleu": 50.24399895278591,
43
- "exact_match": 0.4243614931237721,
44
- "mean_text_metric": 17.097304577536327,
45
- "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
46
  }
47
  ],
48
  "selected": {
49
- "drop_rate_p": 0.7,
50
- "density": 0.3,
51
  "checkpoint_kept": true,
52
- "rougeL": 0.5028853392903572,
53
- "meteor": 0.5450294405327193,
54
- "bleu": 50.24399895278591,
55
- "exact_match": 0.4243614931237721,
56
- "mean_text_metric": 17.097304577536327,
57
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
58
  },
59
  "best_model_path": "/root/SafeGenAI/work/models/model_sft_dare",
 
4
  "drop_rate_p": 0.1,
5
  "density": 0.9,
6
  "checkpoint_kept": true,
7
+ "rougeL": 0.4970292890659862,
8
+ "meteor": 0.5402472477605571,
9
+ "bleu": 49.617399302698296,
10
+ "exact_match": 0.4169941060903733,
11
+ "mean_text_metric": 16.88489194650828,
12
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
13
  },
14
  {
15
  "drop_rate_p": 0.3,
16
  "density": 0.7,
17
  "checkpoint_kept": true,
18
+ "rougeL": 0.49710649238976634,
19
+ "meteor": 0.5407666616360554,
20
+ "bleu": 49.78977109224581,
21
+ "exact_match": 0.4199410609037328,
22
+ "mean_text_metric": 16.942548082090543,
23
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
24
  },
25
  {
26
  "drop_rate_p": 0.5,
27
  "density": 0.5,
28
  "checkpoint_kept": true,
29
+ "rougeL": 0.49744879050858737,
30
+ "meteor": 0.5403587306686749,
31
+ "bleu": 49.815553566088504,
32
+ "exact_match": 0.41895874263261296,
33
+ "mean_text_metric": 16.95112036242192,
34
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
35
  },
36
  {
37
  "drop_rate_p": 0.7,
38
  "density": 0.3,
39
+ "checkpoint_kept": false,
40
+ "rougeL": 0.494856579197801,
41
+ "meteor": 0.5383071027351187,
42
+ "bleu": 49.157171598741805,
43
+ "exact_match": 0.4155206286836935,
44
+ "mean_text_metric": 16.730111760224908,
45
+ "model_path": "temporary merge output removed after evaluation"
46
  }
47
  ],
48
  "selected": {
49
+ "drop_rate_p": 0.5,
50
+ "density": 0.5,
51
  "checkpoint_kept": true,
52
+ "rougeL": 0.49744879050858737,
53
+ "meteor": 0.5403587306686749,
54
+ "bleu": 49.815553566088504,
55
+ "exact_match": 0.41895874263261296,
56
+ "mean_text_metric": 16.95112036242192,
57
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
58
  },
59
  "best_model_path": "/root/SafeGenAI/work/models/model_sft_dare",
part1_sft_metadata.json CHANGED
@@ -25,7 +25,7 @@
25
  "up_proj",
26
  "down_proj"
27
  ],
28
- "train_runtime_seconds": 952.3738,
29
- "train_samples_per_second": 19.234,
30
  "adapter_dir": "/root/SafeGenAI/work/models/model_sft_lora_adapter"
31
  }
 
25
  "up_proj",
26
  "down_proj"
27
  ],
28
+ "train_runtime_seconds": 999.5917,
29
+ "train_samples_per_second": 18.325,
30
  "adapter_dir": "/root/SafeGenAI/work/models/model_sft_lora_adapter"
31
  }
part2_harmful_train_metadata.json CHANGED
@@ -22,5 +22,5 @@
22
  "down_proj"
23
  ],
24
  "adapter_dir": "/root/SafeGenAI/work/models/model_harmful_lora_adapter",
25
- "train_runtime_seconds": 113.9115
26
  }
 
22
  "down_proj"
23
  ],
24
  "adapter_dir": "/root/SafeGenAI/work/models/model_harmful_lora_adapter",
25
+ "train_runtime_seconds": 116.0994
26
  }
part3_lambda_sweep.json CHANGED
@@ -41,10 +41,10 @@
41
  },
42
  {
43
  "lambda": 1.0,
44
- "unsafe_score": 0.975,
45
- "harmful_count": 39,
46
  "unresolved_count": 0,
47
- "not_harmful_count": 1
48
  },
49
  {
50
  "lambda": 1.5,
 
41
  },
42
  {
43
  "lambda": 1.0,
44
+ "unsafe_score": 1.0,
45
+ "harmful_count": 40,
46
  "unresolved_count": 0,
47
+ "not_harmful_count": 0
48
  },
49
  {
50
  "lambda": 1.5,
part4_comparison_summary.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "lowest_unsafe_score_variant": "model_sft_lora",
3
- "lowest_unsafe_score": 0.9509090909090909,
4
  "best_mean_utility_variant": "model_sft_dare",
5
- "best_mean_utility": 17.287378549871548,
6
  "selected_lambdas": {
7
  "model_sft_lora": {
8
  "lambda": 1.0,
@@ -20,14 +20,14 @@
20
  }
21
  },
22
  "selected_dare": {
23
- "drop_rate_p": 0.7,
24
- "density": 0.3,
25
  "checkpoint_kept": true,
26
- "rougeL": 0.5028853392903572,
27
- "meteor": 0.5450294405327193,
28
- "bleu": 50.24399895278591,
29
- "exact_match": 0.4243614931237721,
30
- "mean_text_metric": 17.097304577536327,
31
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
32
  }
33
  }
 
1
  {
2
+ "lowest_unsafe_score_variant": "model_sft_dare_fv",
3
+ "lowest_unsafe_score": 0.9490909090909091,
4
  "best_mean_utility_variant": "model_sft_dare",
5
+ "best_mean_utility": 17.160792085481667,
6
  "selected_lambdas": {
7
  "model_sft_lora": {
8
  "lambda": 1.0,
 
20
  }
21
  },
22
  "selected_dare": {
23
+ "drop_rate_p": 0.5,
24
+ "density": 0.5,
25
  "checkpoint_kept": true,
26
+ "rougeL": 0.49744879050858737,
27
+ "meteor": 0.5403587306686749,
28
+ "bleu": 49.815553566088504,
29
+ "exact_match": 0.41895874263261296,
30
+ "mean_text_metric": 16.95112036242192,
31
  "model_path": "/root/SafeGenAI/work/models/model_sft_dare"
32
  }
33
  }
part4_safety_results.json CHANGED
@@ -19,11 +19,11 @@
19
  },
20
  {
21
  "variant": "model_sft_dare",
22
- "harmful_count": 533,
23
  "unresolved_count": 0,
24
- "not_harmful_count": 17,
25
  "total": 550,
26
- "unsafe_score": 0.9690909090909091
27
  },
28
  {
29
  "variant": "model_sft_resta",
@@ -35,11 +35,11 @@
35
  },
36
  {
37
  "variant": "model_sft_dare_resta",
38
- "harmful_count": 540,
39
  "unresolved_count": 1,
40
- "not_harmful_count": 9,
41
  "total": 550,
42
- "unsafe_score": 0.9818181818181818
43
  },
44
  {
45
  "variant": "model_sft_lora_fv",
@@ -51,11 +51,11 @@
51
  },
52
  {
53
  "variant": "model_sft_dare_fv",
54
- "harmful_count": 529,
55
  "unresolved_count": 0,
56
- "not_harmful_count": 21,
57
  "total": 550,
58
- "unsafe_score": 0.9618181818181818
59
  }
60
  ]
61
  }
 
19
  },
20
  {
21
  "variant": "model_sft_dare",
22
+ "harmful_count": 530,
23
  "unresolved_count": 0,
24
+ "not_harmful_count": 20,
25
  "total": 550,
26
+ "unsafe_score": 0.9636363636363636
27
  },
28
  {
29
  "variant": "model_sft_resta",
 
35
  },
36
  {
37
  "variant": "model_sft_dare_resta",
38
+ "harmful_count": 538,
39
  "unresolved_count": 1,
40
+ "not_harmful_count": 11,
41
  "total": 550,
42
+ "unsafe_score": 0.9781818181818182
43
  },
44
  {
45
  "variant": "model_sft_lora_fv",
 
51
  },
52
  {
53
  "variant": "model_sft_dare_fv",
54
+ "harmful_count": 522,
55
  "unresolved_count": 0,
56
+ "not_harmful_count": 28,
57
  "total": 550,
58
+ "unsafe_score": 0.9490909090909091
59
  }
60
  ]
61
  }
part4_utility_results.json CHANGED
@@ -14,9 +14,9 @@
14
  },
15
  {
16
  "variant": "model_sft_dare",
17
- "rougeL": 0.4977649188609028,
18
- "meteor": 0.5424214597697455,
19
- "bleu": 50.82194927098399
20
  },
21
  {
22
  "variant": "model_sft_resta",
@@ -26,9 +26,9 @@
26
  },
27
  {
28
  "variant": "model_sft_dare_resta",
29
- "rougeL": 0.4972648329903995,
30
- "meteor": 0.5416494119882149,
31
- "bleu": 50.57689575731974
32
  },
33
  {
34
  "variant": "model_sft_lora_fv",
@@ -38,9 +38,9 @@
38
  },
39
  {
40
  "variant": "model_sft_dare_fv",
41
- "rougeL": 0.4929090446721091,
42
- "meteor": 0.5377806504811916,
43
- "bleu": 50.317973552453154
44
  }
45
  ]
46
  }
 
14
  },
15
  {
16
  "variant": "model_sft_dare",
17
+ "rougeL": 0.4947146079909398,
18
+ "meteor": 0.5392367923348791,
19
+ "bleu": 50.44842485611918
20
  },
21
  {
22
  "variant": "model_sft_resta",
 
26
  },
27
  {
28
  "variant": "model_sft_dare_resta",
29
+ "rougeL": 0.4972108904469249,
30
+ "meteor": 0.5415584297493634,
31
+ "bleu": 50.26842101648165
32
  },
33
  {
34
  "variant": "model_sft_lora_fv",
 
38
  },
39
  {
40
  "variant": "model_sft_dare_fv",
41
+ "rougeL": 0.4856199136601769,
42
+ "meteor": 0.5310546393218359,
43
+ "bleu": 49.52422131686529
44
  }
45
  ]
46
  }