mah04 commited on
Commit
bdb45b7
·
verified ·
1 Parent(s): cab0adb

Recompute metrics batch 5

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/hfMetrics/metrics_test_new.json +7 -0
  2. Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/hfMetrics/results_test_with_metrics_new.csv +0 -0
  3. Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/metrics_new.json +7 -0
  4. Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/results_with_metrics_new.csv +0 -0
  5. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/1000/metrics_new.json +7 -0
  6. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/1000/results_with_metrics_new.csv +0 -0
  7. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/350/metrics_new.json +7 -0
  8. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/350/results_with_metrics_new.csv +0 -0
  9. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/50/metrics_new.json +7 -0
  10. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/50/results_with_metrics_new.csv +0 -0
  11. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/1000/metrics_new.json +7 -0
  12. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/1000/results_with_metrics_new.csv +0 -0
  13. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/350/metrics_new.json +7 -0
  14. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/350/results_with_metrics_new.csv +0 -0
  15. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/50/metrics_new.json +7 -0
  16. Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/50/results_with_metrics_new.csv +0 -0
  17. Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/metrics_new.json +7 -0
  18. Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/metrics_test_new.json +7 -0
  19. Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/results_test_with_metrics_new.csv +0 -0
  20. Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/results_with_metrics_new.csv +0 -0
  21. Liquid350M/promptExperiments/EnglishHard/WithLabels/350/metrics_new.json +7 -0
  22. Liquid350M/promptExperiments/EnglishHard/WithLabels/350/results_with_metrics_new.csv +0 -0
  23. Liquid350M/promptExperiments/EnglishHard/WithLabels/50/metrics_new.json +7 -0
  24. Liquid350M/promptExperiments/EnglishHard/WithLabels/50/metrics_test_new.json +7 -0
  25. Liquid350M/promptExperiments/EnglishHard/WithLabels/50/results_test_with_metrics_new.csv +0 -0
  26. Liquid350M/promptExperiments/EnglishHard/WithLabels/50/results_with_metrics_new.csv +0 -0
  27. Liquid350M/promptExperiments/EnglishHard/WithTerms/1000/metrics_new.json +7 -0
  28. Liquid350M/promptExperiments/EnglishHard/WithTerms/1000/results_with_metrics_new.csv +0 -0
  29. Liquid350M/promptExperiments/EnglishHard/WithTerms/350/metrics_new.json +7 -0
  30. Liquid350M/promptExperiments/EnglishHard/WithTerms/350/results_with_metrics_new.csv +0 -0
  31. Liquid350M/promptExperiments/EnglishHard/WithTerms/50/metrics_new.json +7 -0
  32. Liquid350M/promptExperiments/EnglishHard/WithTerms/50/results_with_metrics_new.csv +0 -0
  33. Liquid350M/promptExperiments/Mix/Basic/1000/metrics_new.json +7 -0
  34. Liquid350M/promptExperiments/Mix/Basic/1000/results_with_metrics_new.csv +0 -0
  35. Liquid350M/promptExperiments/Mix/Basic/350/metrics_new.json +7 -0
  36. Liquid350M/promptExperiments/Mix/Basic/350/results_with_metrics_new.csv +0 -0
  37. Liquid350M/promptExperiments/Mix/Basic/50/metrics_new.json +7 -0
  38. Liquid350M/promptExperiments/Mix/Basic/50/results_with_metrics_new.csv +0 -0
  39. Liquid350M/promptExperiments/Mix/Mixed/1000/metrics_new.json +7 -0
  40. Liquid350M/promptExperiments/Mix/Mixed/1000/results_with_metrics_new.csv +0 -0
  41. Liquid350M/promptExperiments/Mix/Mixed/350/metrics_new.json +7 -0
  42. Liquid350M/promptExperiments/Mix/Mixed/350/results_with_metrics_new.csv +0 -0
  43. Liquid350M/promptExperiments/Mix/Mixed/50/metrics_new.json +7 -0
  44. Liquid350M/promptExperiments/Mix/Mixed/50/results_with_metrics_new.csv +0 -0
  45. Liquid350M/promptExperiments/Mix/RAG_3word_Top1/1000/metrics_new.json +7 -0
  46. Liquid350M/promptExperiments/Mix/RAG_3word_Top1/1000/results_with_metrics_new.csv +0 -0
  47. Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/hfMetrics/metrics_test_new.json +7 -0
  48. Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/hfMetrics/results_test_with_metrics_new.csv +0 -0
  49. Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/metrics_new.json +7 -0
  50. Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/results_with_metrics_new.csv +0 -0
Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/hfMetrics/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.0943089430894309,
10
  "f1": 0.047366271947733775
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.37886178861788616,
13
  "domain_accuracy": 0.6857142857142857,
14
  "general_accuracy": 0.5783783783783784,
 
9
  "recall": 0.0943089430894309,
10
  "f1": 0.047366271947733775
11
  },
12
+ "span_level": {
13
+ "precision": 0.0010905125408942203,
14
+ "recall": 0.0032520325203252032,
15
+ "f1": 0.0016333197223356473,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0016333197223356473,
19
  "spriv": 0.37886178861788616,
20
  "domain_accuracy": 0.6857142857142857,
21
  "general_accuracy": 0.5783783783783784,
Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/hfMetrics/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.08660130718954248,
10
  "f1": 0.03996983408748115
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.12581699346405228,
13
  "domain_accuracy": 0.8844621513944223,
14
  "general_accuracy": 0.8670360110803325,
 
9
  "recall": 0.08660130718954248,
10
  "f1": 0.03996983408748115
11
  },
12
+ "span_level": {
13
+ "precision": 0.0029411764705882353,
14
+ "recall": 0.00980392156862745,
15
+ "f1": 0.004524886877828055,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.004524886877828055,
19
  "spriv": 0.12581699346405228,
20
  "domain_accuracy": 0.8844621513944223,
21
  "general_accuracy": 0.8670360110803325,
Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.06006847183317771,
10
  "f1": 0.025307326667759387
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.06255835667600373,
13
  "domain_accuracy": 0.9220389805097451,
14
  "general_accuracy": 0.9462299890015886,
 
9
  "recall": 0.06006847183317771,
10
  "f1": 0.025307326667759387
11
  },
12
+ "span_level": {
13
+ "precision": 0.000809834295444163,
14
+ "recall": 0.003034547152194211,
15
+ "f1": 0.0012784789378790361,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0012784789378790361,
19
  "spriv": 0.06255835667600373,
20
  "domain_accuracy": 0.9220389805097451,
21
  "general_accuracy": 0.9462299890015886,
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.03402061855670103,
10
  "f1": 0.014548126377663482
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.19415807560137457,
13
  "domain_accuracy": 0.7739251040221914,
14
  "general_accuracy": 0.8371934604904632,
 
9
  "recall": 0.03402061855670103,
10
  "f1": 0.014548126377663482
11
  },
12
+ "span_level": {
13
+ "precision": 0.0007476635514018691,
14
+ "recall": 0.0027491408934707906,
15
+ "f1": 0.0011756061719324026,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0011756061719324026,
19
  "spriv": 0.19415807560137457,
20
  "domain_accuracy": 0.7739251040221914,
21
  "general_accuracy": 0.8371934604904632,
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.07026143790849673,
10
  "f1": 0.03517382413087935
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.21568627450980393,
13
  "domain_accuracy": 0.8047808764940239,
14
  "general_accuracy": 0.7700831024930748,
 
9
  "recall": 0.07026143790849673,
10
  "f1": 0.03517382413087935
11
  },
12
+ "span_level": {
13
+ "precision": 0.0016366612111292963,
14
+ "recall": 0.004901960784313725,
15
+ "f1": 0.00245398773006135,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.00245398773006135,
19
  "spriv": 0.21568627450980393,
20
  "domain_accuracy": 0.8047808764940239,
21
  "general_accuracy": 0.7700831024930748,
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.05913476501711796,
10
  "f1": 0.0248276763255039
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.0647370059134765,
13
  "domain_accuracy": 0.9126151210109231,
14
  "general_accuracy": 0.9481852621288036,
 
9
  "recall": 0.05913476501711796,
10
  "f1": 0.0248276763255039
11
  },
12
+ "span_level": {
13
+ "precision": 0.0005995451726276618,
14
+ "recall": 0.00225645813881108,
15
+ "f1": 0.0009473718597889648,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0009473718597889648,
19
  "spriv": 0.0647370059134765,
20
  "domain_accuracy": 0.9126151210109231,
21
  "general_accuracy": 0.9481852621288036,
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.036426116838487975,
10
  "f1": 0.014436499829758257
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.1508591065292096,
13
  "domain_accuracy": 0.8259361997226075,
14
  "general_accuracy": 0.8719346049046321,
 
9
  "recall": 0.036426116838487975,
10
  "f1": 0.014436499829758257
11
  },
12
+ "span_level": {
13
+ "precision": 0.0007643312101910828,
14
+ "recall": 0.003092783505154639,
15
+ "f1": 0.0012257405515832482,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0012257405515832482,
19
  "spriv": 0.1508591065292096,
20
  "domain_accuracy": 0.8259361997226075,
21
  "general_accuracy": 0.8719346049046321,
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.0718954248366013,
10
  "f1": 0.03471400394477318
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.16666666666666666,
13
  "domain_accuracy": 0.848605577689243,
14
  "general_accuracy": 0.8227146814404432,
 
9
  "recall": 0.0718954248366013,
10
  "f1": 0.03471400394477318
11
  },
12
+ "span_level": {
13
+ "precision": 0.0010400416016640667,
14
+ "recall": 0.0032679738562091504,
15
+ "f1": 0.0015779092702169627,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0015779092702169627,
19
  "spriv": 0.16666666666666666,
20
  "domain_accuracy": 0.848605577689243,
21
  "general_accuracy": 0.8227146814404432,
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.08823529411764706,
10
  "f1": 0.04761005101076894
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.03649237472766884,
13
  "domain_accuracy": 0.9601627757549797,
14
  "general_accuracy": 0.9654161065623854,
 
9
  "recall": 0.08823529411764706,
10
  "f1": 0.04761005101076894
11
  },
12
+ "span_level": {
13
+ "precision": 2.874802357337933e-05,
14
+ "recall": 7.78089013383131e-05,
15
+ "f1": 4.198417196716838e-05,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 4.198417196716838e-05,
19
  "spriv": 0.03649237472766884,
20
  "domain_accuracy": 0.9601627757549797,
21
  "general_accuracy": 0.9654161065623854,
Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.07987023018693033,
10
  "f1": 0.0434618132907402
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.0326741850764715,
13
  "domain_accuracy": 0.9641068447412354,
14
  "general_accuracy": 0.9692175619327937,
 
9
  "recall": 0.07987023018693033,
10
  "f1": 0.0434618132907402
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.0326741850764715,
20
  "domain_accuracy": 0.9641068447412354,
21
  "general_accuracy": 0.9692175619327937,
Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/WithLabels/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.031958762886597936,
10
  "f1": 0.014476961394769616
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.07628865979381444,
13
  "domain_accuracy": 0.926490984743412,
14
  "general_accuracy": 0.9209809264305178,
 
9
  "recall": 0.031958762886597936,
10
  "f1": 0.014476961394769616
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.07628865979381444,
20
  "domain_accuracy": 0.926490984743412,
21
  "general_accuracy": 0.9209809264305178,
Liquid350M/promptExperiments/EnglishHard/WithLabels/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/WithLabels/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.07352941176470588,
10
  "f1": 0.03802281368821293
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.03104575163398693,
13
  "domain_accuracy": 0.9721115537848606,
14
  "general_accuracy": 0.9667590027700831,
 
9
  "recall": 0.07352941176470588,
10
  "f1": 0.03802281368821293
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.03104575163398693,
20
  "domain_accuracy": 0.9721115537848606,
21
  "general_accuracy": 0.9667590027700831,
Liquid350M/promptExperiments/EnglishHard/WithLabels/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.07642276422764227,
10
  "f1": 0.03924843423799583
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.013008130081300813,
13
  "domain_accuracy": 0.9918367346938776,
14
  "general_accuracy": 0.9837837837837838,
 
9
  "recall": 0.07642276422764227,
10
  "f1": 0.03924843423799583
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.013008130081300813,
20
  "domain_accuracy": 0.9918367346938776,
21
  "general_accuracy": 0.9837837837837838,
Liquid350M/promptExperiments/EnglishHard/WithLabels/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/WithLabels/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/WithTerms/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.03649237472766884,
10
  "f1": 0.05288082083662194
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.7595704948646125,
13
  "domain_accuracy": 0.12893553223388307,
14
  "general_accuracy": 0.3040449712819259,
 
9
  "recall": 0.03649237472766884,
10
  "f1": 0.05288082083662194
11
  },
12
+ "span_level": {
13
+ "precision": 0.0014326647564469914,
14
+ "recall": 0.0005446623093681918,
15
+ "f1": 0.0007892659826361484,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0007892659826361484,
19
  "spriv": 0.7595704948646125,
20
  "domain_accuracy": 0.12893553223388307,
21
  "general_accuracy": 0.3040449712819259,
Liquid350M/promptExperiments/EnglishHard/WithTerms/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/WithTerms/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.02302405498281787,
10
  "f1": 0.02667728449133984
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.7109965635738832,
13
  "domain_accuracy": 0.2447988904299584,
14
  "general_accuracy": 0.33242506811989103,
 
9
  "recall": 0.02302405498281787,
10
  "f1": 0.02667728449133984
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.7109965635738832,
20
  "domain_accuracy": 0.2447988904299584,
21
  "general_accuracy": 0.33242506811989103,
Liquid350M/promptExperiments/EnglishHard/WithTerms/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/EnglishHard/WithTerms/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.08333333333333333,
10
  "f1": 0.04283914321713566
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.049019607843137254,
13
  "domain_accuracy": 0.952191235059761,
14
  "general_accuracy": 0.9501385041551247,
 
9
  "recall": 0.08333333333333333,
10
  "f1": 0.04283914321713566
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.049019607843137254,
20
  "domain_accuracy": 0.952191235059761,
21
  "general_accuracy": 0.9501385041551247,
Liquid350M/promptExperiments/EnglishHard/WithTerms/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/Mix/Basic/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.07058545797922569,
10
  "f1": 0.04902309058614565
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.3523764557758892,
13
  "domain_accuracy": 0.6231371946535567,
14
  "general_accuracy": 0.673334408775609,
 
9
  "recall": 0.07058545797922569,
10
  "f1": 0.04902309058614565
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.3523764557758892,
20
  "domain_accuracy": 0.6231371946535567,
21
  "general_accuracy": 0.673334408775609,
Liquid350M/promptExperiments/Mix/Basic/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/Mix/Basic/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.0332522303325223,
10
  "f1": 0.02295204329165889
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.38686131386861317,
13
  "domain_accuracy": 0.6234225138818779,
14
  "general_accuracy": 0.6012805587892899,
 
9
  "recall": 0.0332522303325223,
10
  "f1": 0.02295204329165889
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.38686131386861317,
20
  "domain_accuracy": 0.6234225138818779,
21
  "general_accuracy": 0.6012805587892899,
Liquid350M/promptExperiments/Mix/Basic/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/Mix/Basic/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.06891025641025642,
10
  "f1": 0.037423846823324634
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.16185897435897437,
13
  "domain_accuracy": 0.8200589970501475,
14
  "general_accuracy": 0.8596491228070176,
 
9
  "recall": 0.06891025641025642,
10
  "f1": 0.037423846823324634
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.16185897435897437,
20
  "domain_accuracy": 0.8200589970501475,
21
  "general_accuracy": 0.8596491228070176,
Liquid350M/promptExperiments/Mix/Basic/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/Mix/Mixed/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.00039345294302801383,
10
  "f1": 0.0007320108337603396
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.9390934844192634,
13
  "domain_accuracy": 0.04685819634352435,
14
  "general_accuracy": 0.07565736409098242,
 
9
  "recall": 0.00039345294302801383,
10
  "f1": 0.0007320108337603396
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.9390934844192634,
20
  "domain_accuracy": 0.04685819634352435,
21
  "general_accuracy": 0.07565736409098242,
Liquid350M/promptExperiments/Mix/Mixed/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/Mix/Mixed/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.0032441200324412004,
10
  "f1": 0.005934718100890209
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.943227899432279,
13
  "domain_accuracy": 0.057041898031297326,
14
  "general_accuracy": 0.056461001164144355,
 
9
  "recall": 0.0032441200324412004,
10
  "f1": 0.005934718100890209
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.943227899432279,
20
  "domain_accuracy": 0.057041898031297326,
21
  "general_accuracy": 0.056461001164144355,
Liquid350M/promptExperiments/Mix/Mixed/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/Mix/Mixed/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.01282051282051282,
10
  "f1": 0.013547840812870448
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.5641025641025641,
13
  "domain_accuracy": 0.471976401179941,
14
  "general_accuracy": 0.3929824561403509,
 
9
  "recall": 0.01282051282051282,
10
  "f1": 0.013547840812870448
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.5641025641025641,
20
  "domain_accuracy": 0.471976401179941,
21
  "general_accuracy": 0.3929824561403509,
Liquid350M/promptExperiments/Mix/Mixed/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.019751337740006297,
10
  "f1": 0.012231670768256134
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.3051621026125275,
13
  "domain_accuracy": 0.6500230450145952,
14
  "general_accuracy": 0.7418938538473947,
 
9
  "recall": 0.019751337740006297,
10
  "f1": 0.012231670768256134
11
  },
12
+ "span_level": {
13
+ "precision": 0.0007058906575371475,
14
+ "recall": 0.0015738117721120553,
15
+ "f1": 0.0009746351209765845,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0009746351209765845,
19
  "spriv": 0.3051621026125275,
20
  "domain_accuracy": 0.6500230450145952,
21
  "general_accuracy": 0.7418938538473947,
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/hfMetrics/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.032117583015786606,
10
  "f1": 0.02987719964552475
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.5982580293957539,
13
  "domain_accuracy": 0.3745492014425554,
14
  "general_accuracy": 0.432198499711483,
 
9
  "recall": 0.032117583015786606,
10
  "f1": 0.02987719964552475
11
  },
12
+ "span_level": {
13
+ "precision": 0.001893491124260355,
14
+ "recall": 0.002177463255307567,
15
+ "f1": 0.002025572857323712,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.002025572857323712,
19
  "spriv": 0.5982580293957539,
20
  "domain_accuracy": 0.3745492014425554,
21
  "general_accuracy": 0.432198499711483,
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/hfMetrics/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.01567991349013247,
10
  "f1": 0.01033223479112853
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.3560421735604217,
13
  "domain_accuracy": 0.6431095406360424,
14
  "general_accuracy": 0.6449359720605355,
 
9
  "recall": 0.01567991349013247,
10
  "f1": 0.01033223479112853
11
  },
12
+ "span_level": {
13
+ "precision": 0.00026567481402763017,
14
+ "recall": 0.0005406866720735334,
15
+ "f1": 0.00035628395831477686,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.00035628395831477686,
19
  "spriv": 0.3560421735604217,
20
  "domain_accuracy": 0.6431095406360424,
21
  "general_accuracy": 0.6449359720605355,
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff