cmndcntrlcyber commited on
Commit
d8a0038
·
verified ·
1 Parent(s): e977b42

Phase 3 eval: baseline + finetuned metrics

Browse files
Files changed (3) hide show
  1. eval/baseline.json +3 -3
  2. eval/finetuned.json +3 -3
  3. eval/summary.json +7 -7
eval/baseline.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "exact_match": 0.0,
3
  "bleu_4": 0.0,
4
- "mean_edit_similarity": 0.06603700665878295,
5
- "num_samples": 20,
6
- "syntax_valid_rate": 0.2,
7
  "run_name": "baseline"
8
  }
 
1
  {
2
  "exact_match": 0.0,
3
  "bleu_4": 0.0,
4
+ "mean_edit_similarity": 0.03815683829552613,
5
+ "num_samples": 200,
6
+ "syntax_valid_rate": 0.195,
7
  "run_name": "baseline"
8
  }
eval/finetuned.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "exact_match": 0.0,
3
  "bleu_4": 0.0,
4
- "mean_edit_similarity": 0.023189852963776902,
5
- "num_samples": 20,
6
- "syntax_valid_rate": 0.65,
7
  "run_name": "finetuned"
8
  }
 
1
  {
2
  "exact_match": 0.0,
3
  "bleu_4": 0.0,
4
+ "mean_edit_similarity": 0.04458389402018659,
5
+ "num_samples": 200,
6
+ "syntax_valid_rate": 0.61,
7
  "run_name": "finetuned"
8
  }
eval/summary.json CHANGED
@@ -2,21 +2,21 @@
2
  "dataset": "cmndcntrlcyber/code-trainer-offsec-dataset@v2-multimodal",
3
  "adapter": "cmndcntrlcyber/code-trainer-vision-adapter",
4
  "split": "test",
5
- "num_samples": 20,
6
  "baseline": {
7
  "exact_match": 0.0,
8
  "bleu_4": 0.0,
9
- "mean_edit_similarity": 0.06603700665878295,
10
- "num_samples": 20,
11
- "syntax_valid_rate": 0.2,
12
  "run_name": "baseline"
13
  },
14
  "finetuned": {
15
  "exact_match": 0.0,
16
  "bleu_4": 0.0,
17
- "mean_edit_similarity": 0.023189852963776902,
18
- "num_samples": 20,
19
- "syntax_valid_rate": 0.65,
20
  "run_name": "finetuned"
21
  }
22
  }
 
2
  "dataset": "cmndcntrlcyber/code-trainer-offsec-dataset@v2-multimodal",
3
  "adapter": "cmndcntrlcyber/code-trainer-vision-adapter",
4
  "split": "test",
5
+ "num_samples": 200,
6
  "baseline": {
7
  "exact_match": 0.0,
8
  "bleu_4": 0.0,
9
+ "mean_edit_similarity": 0.03815683829552613,
10
+ "num_samples": 200,
11
+ "syntax_valid_rate": 0.195,
12
  "run_name": "baseline"
13
  },
14
  "finetuned": {
15
  "exact_match": 0.0,
16
  "bleu_4": 0.0,
17
+ "mean_edit_similarity": 0.04458389402018659,
18
+ "num_samples": 200,
19
+ "syntax_valid_rate": 0.61,
20
  "run_name": "finetuned"
21
  }
22
  }