Upload Assignment 2 artifacts
Browse files- part1_dare_results.json +29 -29
- part1_sft_metadata.json +2 -2
- part2_harmful_train_metadata.json +1 -1
- part3_lambda_sweep.json +3 -3
- part4_comparison_summary.json +10 -10
- part4_safety_results.json +9 -9
- part4_utility_results.json +9 -9
part1_dare_results.json
CHANGED
|
@@ -4,56 +4,56 @@
|
|
| 4 |
"drop_rate_p": 0.1,
|
| 5 |
"density": 0.9,
|
| 6 |
"checkpoint_kept": true,
|
| 7 |
-
"rougeL": 0.
|
| 8 |
-
"meteor": 0.
|
| 9 |
-
"bleu": 49.
|
| 10 |
-
"exact_match": 0.
|
| 11 |
-
"mean_text_metric": 16.
|
| 12 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 13 |
},
|
| 14 |
{
|
| 15 |
"drop_rate_p": 0.3,
|
| 16 |
"density": 0.7,
|
| 17 |
"checkpoint_kept": true,
|
| 18 |
-
"rougeL": 0.
|
| 19 |
-
"meteor": 0.
|
| 20 |
-
"bleu": 49.
|
| 21 |
-
"exact_match": 0.
|
| 22 |
-
"mean_text_metric": 16.
|
| 23 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"drop_rate_p": 0.5,
|
| 27 |
"density": 0.5,
|
| 28 |
"checkpoint_kept": true,
|
| 29 |
-
"rougeL": 0.
|
| 30 |
-
"meteor": 0.
|
| 31 |
-
"bleu": 49.
|
| 32 |
-
"exact_match": 0.
|
| 33 |
-
"mean_text_metric":
|
| 34 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"drop_rate_p": 0.7,
|
| 38 |
"density": 0.3,
|
| 39 |
-
"checkpoint_kept":
|
| 40 |
-
"rougeL": 0.
|
| 41 |
-
"meteor": 0.
|
| 42 |
-
"bleu":
|
| 43 |
-
"exact_match": 0.
|
| 44 |
-
"mean_text_metric":
|
| 45 |
-
"model_path": "
|
| 46 |
}
|
| 47 |
],
|
| 48 |
"selected": {
|
| 49 |
-
"drop_rate_p": 0.
|
| 50 |
-
"density": 0.
|
| 51 |
"checkpoint_kept": true,
|
| 52 |
-
"rougeL": 0.
|
| 53 |
-
"meteor": 0.
|
| 54 |
-
"bleu":
|
| 55 |
-
"exact_match": 0.
|
| 56 |
-
"mean_text_metric":
|
| 57 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 58 |
},
|
| 59 |
"best_model_path": "/root/SafeGenAI/work/models/model_sft_dare",
|
|
|
|
| 4 |
"drop_rate_p": 0.1,
|
| 5 |
"density": 0.9,
|
| 6 |
"checkpoint_kept": true,
|
| 7 |
+
"rougeL": 0.4970292890659862,
|
| 8 |
+
"meteor": 0.5402472477605571,
|
| 9 |
+
"bleu": 49.617399302698296,
|
| 10 |
+
"exact_match": 0.4169941060903733,
|
| 11 |
+
"mean_text_metric": 16.88489194650828,
|
| 12 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 13 |
},
|
| 14 |
{
|
| 15 |
"drop_rate_p": 0.3,
|
| 16 |
"density": 0.7,
|
| 17 |
"checkpoint_kept": true,
|
| 18 |
+
"rougeL": 0.49710649238976634,
|
| 19 |
+
"meteor": 0.5407666616360554,
|
| 20 |
+
"bleu": 49.78977109224581,
|
| 21 |
+
"exact_match": 0.4199410609037328,
|
| 22 |
+
"mean_text_metric": 16.942548082090543,
|
| 23 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"drop_rate_p": 0.5,
|
| 27 |
"density": 0.5,
|
| 28 |
"checkpoint_kept": true,
|
| 29 |
+
"rougeL": 0.49744879050858737,
|
| 30 |
+
"meteor": 0.5403587306686749,
|
| 31 |
+
"bleu": 49.815553566088504,
|
| 32 |
+
"exact_match": 0.41895874263261296,
|
| 33 |
+
"mean_text_metric": 16.95112036242192,
|
| 34 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"drop_rate_p": 0.7,
|
| 38 |
"density": 0.3,
|
| 39 |
+
"checkpoint_kept": false,
|
| 40 |
+
"rougeL": 0.494856579197801,
|
| 41 |
+
"meteor": 0.5383071027351187,
|
| 42 |
+
"bleu": 49.157171598741805,
|
| 43 |
+
"exact_match": 0.4155206286836935,
|
| 44 |
+
"mean_text_metric": 16.730111760224908,
|
| 45 |
+
"model_path": "temporary merge output removed after evaluation"
|
| 46 |
}
|
| 47 |
],
|
| 48 |
"selected": {
|
| 49 |
+
"drop_rate_p": 0.5,
|
| 50 |
+
"density": 0.5,
|
| 51 |
"checkpoint_kept": true,
|
| 52 |
+
"rougeL": 0.49744879050858737,
|
| 53 |
+
"meteor": 0.5403587306686749,
|
| 54 |
+
"bleu": 49.815553566088504,
|
| 55 |
+
"exact_match": 0.41895874263261296,
|
| 56 |
+
"mean_text_metric": 16.95112036242192,
|
| 57 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 58 |
},
|
| 59 |
"best_model_path": "/root/SafeGenAI/work/models/model_sft_dare",
|
part1_sft_metadata.json
CHANGED
|
@@ -25,7 +25,7 @@
|
|
| 25 |
"up_proj",
|
| 26 |
"down_proj"
|
| 27 |
],
|
| 28 |
-
"train_runtime_seconds":
|
| 29 |
-
"train_samples_per_second":
|
| 30 |
"adapter_dir": "/root/SafeGenAI/work/models/model_sft_lora_adapter"
|
| 31 |
}
|
|
|
|
| 25 |
"up_proj",
|
| 26 |
"down_proj"
|
| 27 |
],
|
| 28 |
+
"train_runtime_seconds": 999.5917,
|
| 29 |
+
"train_samples_per_second": 18.325,
|
| 30 |
"adapter_dir": "/root/SafeGenAI/work/models/model_sft_lora_adapter"
|
| 31 |
}
|
part2_harmful_train_metadata.json
CHANGED
|
@@ -22,5 +22,5 @@
|
|
| 22 |
"down_proj"
|
| 23 |
],
|
| 24 |
"adapter_dir": "/root/SafeGenAI/work/models/model_harmful_lora_adapter",
|
| 25 |
-
"train_runtime_seconds":
|
| 26 |
}
|
|
|
|
| 22 |
"down_proj"
|
| 23 |
],
|
| 24 |
"adapter_dir": "/root/SafeGenAI/work/models/model_harmful_lora_adapter",
|
| 25 |
+
"train_runtime_seconds": 116.0994
|
| 26 |
}
|
part3_lambda_sweep.json
CHANGED
|
@@ -41,10 +41,10 @@
|
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"lambda": 1.0,
|
| 44 |
-
"unsafe_score":
|
| 45 |
-
"harmful_count":
|
| 46 |
"unresolved_count": 0,
|
| 47 |
-
"not_harmful_count":
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"lambda": 1.5,
|
|
|
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"lambda": 1.0,
|
| 44 |
+
"unsafe_score": 1.0,
|
| 45 |
+
"harmful_count": 40,
|
| 46 |
"unresolved_count": 0,
|
| 47 |
+
"not_harmful_count": 0
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"lambda": 1.5,
|
part4_comparison_summary.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"lowest_unsafe_score_variant": "
|
| 3 |
-
"lowest_unsafe_score": 0.
|
| 4 |
"best_mean_utility_variant": "model_sft_dare",
|
| 5 |
-
"best_mean_utility": 17.
|
| 6 |
"selected_lambdas": {
|
| 7 |
"model_sft_lora": {
|
| 8 |
"lambda": 1.0,
|
|
@@ -20,14 +20,14 @@
|
|
| 20 |
}
|
| 21 |
},
|
| 22 |
"selected_dare": {
|
| 23 |
-
"drop_rate_p": 0.
|
| 24 |
-
"density": 0.
|
| 25 |
"checkpoint_kept": true,
|
| 26 |
-
"rougeL": 0.
|
| 27 |
-
"meteor": 0.
|
| 28 |
-
"bleu":
|
| 29 |
-
"exact_match": 0.
|
| 30 |
-
"mean_text_metric":
|
| 31 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 32 |
}
|
| 33 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"lowest_unsafe_score_variant": "model_sft_dare_fv",
|
| 3 |
+
"lowest_unsafe_score": 0.9490909090909091,
|
| 4 |
"best_mean_utility_variant": "model_sft_dare",
|
| 5 |
+
"best_mean_utility": 17.160792085481667,
|
| 6 |
"selected_lambdas": {
|
| 7 |
"model_sft_lora": {
|
| 8 |
"lambda": 1.0,
|
|
|
|
| 20 |
}
|
| 21 |
},
|
| 22 |
"selected_dare": {
|
| 23 |
+
"drop_rate_p": 0.5,
|
| 24 |
+
"density": 0.5,
|
| 25 |
"checkpoint_kept": true,
|
| 26 |
+
"rougeL": 0.49744879050858737,
|
| 27 |
+
"meteor": 0.5403587306686749,
|
| 28 |
+
"bleu": 49.815553566088504,
|
| 29 |
+
"exact_match": 0.41895874263261296,
|
| 30 |
+
"mean_text_metric": 16.95112036242192,
|
| 31 |
"model_path": "/root/SafeGenAI/work/models/model_sft_dare"
|
| 32 |
}
|
| 33 |
}
|
part4_safety_results.json
CHANGED
|
@@ -19,11 +19,11 @@
|
|
| 19 |
},
|
| 20 |
{
|
| 21 |
"variant": "model_sft_dare",
|
| 22 |
-
"harmful_count":
|
| 23 |
"unresolved_count": 0,
|
| 24 |
-
"not_harmful_count":
|
| 25 |
"total": 550,
|
| 26 |
-
"unsafe_score": 0.
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"variant": "model_sft_resta",
|
|
@@ -35,11 +35,11 @@
|
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"variant": "model_sft_dare_resta",
|
| 38 |
-
"harmful_count":
|
| 39 |
"unresolved_count": 1,
|
| 40 |
-
"not_harmful_count":
|
| 41 |
"total": 550,
|
| 42 |
-
"unsafe_score": 0.
|
| 43 |
},
|
| 44 |
{
|
| 45 |
"variant": "model_sft_lora_fv",
|
|
@@ -51,11 +51,11 @@
|
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"variant": "model_sft_dare_fv",
|
| 54 |
-
"harmful_count":
|
| 55 |
"unresolved_count": 0,
|
| 56 |
-
"not_harmful_count":
|
| 57 |
"total": 550,
|
| 58 |
-
"unsafe_score": 0.
|
| 59 |
}
|
| 60 |
]
|
| 61 |
}
|
|
|
|
| 19 |
},
|
| 20 |
{
|
| 21 |
"variant": "model_sft_dare",
|
| 22 |
+
"harmful_count": 530,
|
| 23 |
"unresolved_count": 0,
|
| 24 |
+
"not_harmful_count": 20,
|
| 25 |
"total": 550,
|
| 26 |
+
"unsafe_score": 0.9636363636363636
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"variant": "model_sft_resta",
|
|
|
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"variant": "model_sft_dare_resta",
|
| 38 |
+
"harmful_count": 538,
|
| 39 |
"unresolved_count": 1,
|
| 40 |
+
"not_harmful_count": 11,
|
| 41 |
"total": 550,
|
| 42 |
+
"unsafe_score": 0.9781818181818182
|
| 43 |
},
|
| 44 |
{
|
| 45 |
"variant": "model_sft_lora_fv",
|
|
|
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"variant": "model_sft_dare_fv",
|
| 54 |
+
"harmful_count": 522,
|
| 55 |
"unresolved_count": 0,
|
| 56 |
+
"not_harmful_count": 28,
|
| 57 |
"total": 550,
|
| 58 |
+
"unsafe_score": 0.9490909090909091
|
| 59 |
}
|
| 60 |
]
|
| 61 |
}
|
part4_utility_results.json
CHANGED
|
@@ -14,9 +14,9 @@
|
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"variant": "model_sft_dare",
|
| 17 |
-
"rougeL": 0.
|
| 18 |
-
"meteor": 0.
|
| 19 |
-
"bleu": 50.
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"variant": "model_sft_resta",
|
|
@@ -26,9 +26,9 @@
|
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"variant": "model_sft_dare_resta",
|
| 29 |
-
"rougeL": 0.
|
| 30 |
-
"meteor": 0.
|
| 31 |
-
"bleu": 50.
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"variant": "model_sft_lora_fv",
|
|
@@ -38,9 +38,9 @@
|
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"variant": "model_sft_dare_fv",
|
| 41 |
-
"rougeL": 0.
|
| 42 |
-
"meteor": 0.
|
| 43 |
-
"bleu":
|
| 44 |
}
|
| 45 |
]
|
| 46 |
}
|
|
|
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"variant": "model_sft_dare",
|
| 17 |
+
"rougeL": 0.4947146079909398,
|
| 18 |
+
"meteor": 0.5392367923348791,
|
| 19 |
+
"bleu": 50.44842485611918
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"variant": "model_sft_resta",
|
|
|
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"variant": "model_sft_dare_resta",
|
| 29 |
+
"rougeL": 0.4972108904469249,
|
| 30 |
+
"meteor": 0.5415584297493634,
|
| 31 |
+
"bleu": 50.26842101648165
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"variant": "model_sft_lora_fv",
|
|
|
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"variant": "model_sft_dare_fv",
|
| 41 |
+
"rougeL": 0.4856199136601769,
|
| 42 |
+
"meteor": 0.5310546393218359,
|
| 43 |
+
"bleu": 49.52422131686529
|
| 44 |
}
|
| 45 |
]
|
| 46 |
}
|