cmndcntrlcyber's picture
Phase 3 eval: baseline + finetuned metrics
f823218 verified
raw
history blame contribute delete
568 Bytes
{
"dataset": "cmndcntrlcyber/code-trainer-offsec-dataset@v2-multimodal",
"adapter": "cmndcntrlcyber/code-trainer-vision-adapter",
"split": "test",
"num_samples": 200,
"baseline": {
"exact_match": 0.0,
"bleu_4": 0.0,
"mean_edit_similarity": 0.04701831394975743,
"num_samples": 200,
"syntax_valid_rate": 0.225,
"run_name": "baseline"
},
"finetuned": {
"exact_match": 0.0,
"bleu_4": 0.0,
"mean_edit_similarity": 0.04458389402018659,
"num_samples": 200,
"syntax_valid_rate": 0.61,
"run_name": "finetuned"
}
}