mah04 commited on
Commit
71aa2e6
·
verified ·
1 Parent(s): e36a11a

Recompute metrics batch 1

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Baseline/ArabicHard/1000/metrics_test_new.json +7 -0
  2. Baseline/ArabicHard/1000/results_test_with_metrics_new.csv +0 -0
  3. Baseline/ArabicHard/350/metrics_test_new.json +7 -0
  4. Baseline/ArabicHard/350/results_test_with_metrics_new.csv +0 -0
  5. Baseline/ArabicHard/50/metrics_test_new.json +7 -0
  6. Baseline/ArabicHard/50/results_test_with_metrics_new.csv +0 -0
  7. Baseline/ChineseHard/1000/metrics_test_new.json +7 -0
  8. Baseline/ChineseHard/1000/results_test_with_metrics_new.csv +0 -0
  9. Baseline/ChineseHard/350/metrics_test_new.json +7 -0
  10. Baseline/ChineseHard/350/results_test_with_metrics_new.csv +0 -0
  11. Baseline/ChineseHard/50/metrics_test_new.json +7 -0
  12. Baseline/ChineseHard/50/results_test_with_metrics_new.csv +0 -0
  13. Baseline/EnglishEasy/1000/metrics_test_new.json +7 -0
  14. Baseline/EnglishEasy/1000/results_test_with_metrics_new.csv +0 -0
  15. Baseline/EnglishEasy/350/metrics_test_new.json +7 -0
  16. Baseline/EnglishEasy/350/results_test_with_metrics_new.csv +0 -0
  17. Baseline/EnglishEasy/50/metrics_test_new.json +7 -0
  18. Baseline/EnglishEasy/50/results_test_with_metrics_new.csv +0 -0
  19. Baseline/EnglishHard/1000/metrics_test_new.json +7 -0
  20. Baseline/EnglishHard/1000/results_test_with_metrics_new.csv +0 -0
  21. Baseline/EnglishHard/350/metrics_test_new.json +7 -0
  22. Baseline/EnglishHard/350/results_test_with_metrics_new.csv +0 -0
  23. Baseline/EnglishHard/50/metrics_test_new.json +7 -0
  24. Baseline/EnglishHard/50/results_test_with_metrics_new.csv +0 -0
  25. Baseline/Mix/1000/metrics_test_new.json +7 -0
  26. Baseline/Mix/1000/results_test_with_metrics_new.csv +0 -0
  27. Baseline/Mix/350/metrics_test_new.json +7 -0
  28. Baseline/Mix/350/results_test_with_metrics_new.csv +0 -0
  29. Baseline/Mix/50/metrics_test_new.json +7 -0
  30. Baseline/Mix/50/results_test_with_metrics_new.csv +0 -0
  31. Baseline/fuzzy/Arabic/1000/metrics_test_new.json +7 -0
  32. Baseline/fuzzy/Arabic/1000/results_test_with_metrics_new.csv +0 -0
  33. Baseline/fuzzy/Arabic/350/metrics_test_new.json +7 -0
  34. Baseline/fuzzy/Arabic/350/results_test_with_metrics_new.csv +0 -0
  35. Baseline/fuzzy/Arabic/50/metrics_test_new.json +7 -0
  36. Baseline/fuzzy/Arabic/50/results_test_with_metrics_new.csv +0 -0
  37. Baseline/fuzzy/Chinese/1000/metrics_test_new.json +7 -0
  38. Baseline/fuzzy/Chinese/1000/results_test_with_metrics_new.csv +0 -0
  39. Baseline/fuzzy/Chinese/350/metrics_test_new.json +7 -0
  40. Baseline/fuzzy/Chinese/350/results_test_with_metrics_new.csv +0 -0
  41. Baseline/fuzzy/Chinese/50/metrics_test_new.json +7 -0
  42. Baseline/fuzzy/Chinese/50/results_test_with_metrics_new.csv +0 -0
  43. Baseline/fuzzy/EnglishEasy/1000/metrics_test_new.json +7 -0
  44. Baseline/fuzzy/EnglishEasy/1000/results_test_with_metrics_new.csv +0 -0
  45. Baseline/fuzzy/EnglishEasy/350/metrics_test_new.json +7 -0
  46. Baseline/fuzzy/EnglishEasy/350/results_test_with_metrics_new.csv +0 -0
  47. Baseline/fuzzy/EnglishEasy/50/metrics_test_new.json +7 -0
  48. Baseline/fuzzy/EnglishEasy/50/results_test_with_metrics_new.csv +0 -0
  49. Baseline/fuzzy/EnglishHard/1000/metrics_test_new.json +7 -0
  50. Baseline/fuzzy/EnglishHard/1000/results_test_with_metrics_new.csv +0 -0
Baseline/ArabicHard/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.6228265264860493,
10
  "f1": 0.5134166666666667
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.18661544682571776,
13
  "domain_accuracy": 0.7954133977066988,
14
  "general_accuracy": 0.8315383052225157,
 
9
  "recall": 0.6228265264860493,
10
  "f1": 0.5134166666666667
11
  },
12
+ "span_level": {
13
+ "precision": 0.37198752480861924,
14
+ "recall": 0.5305297209866559,
15
+ "f1": 0.43733333333333335,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.43733333333333335,
19
  "spriv": 0.18661544682571776,
20
  "domain_accuracy": 0.7954133977066988,
21
  "general_accuracy": 0.8315383052225157,
Baseline/ArabicHard/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/ArabicHard/350/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.32573632538569425,
10
  "f1": 0.301819363222872
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.4905329593267882,
13
  "domain_accuracy": 0.38765603328710124,
14
  "general_accuracy": 0.6340425531914894,
 
9
  "recall": 0.32573632538569425,
10
  "f1": 0.301819363222872
11
  },
12
+ "span_level": {
13
+ "precision": 0.1643462469733656,
14
+ "recall": 0.19039270687237025,
15
+ "f1": 0.17641325536062377,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.17641325536062377,
19
  "spriv": 0.4905329593267882,
20
  "domain_accuracy": 0.38765603328710124,
21
  "general_accuracy": 0.6340425531914894,
Baseline/ArabicHard/350/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/ArabicHard/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.17892644135188868,
10
  "f1": 0.11313639220615965
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.4990059642147117,
13
  "domain_accuracy": 0.540650406504065,
14
  "general_accuracy": 0.46303501945525294,
 
9
  "recall": 0.17892644135188868,
10
  "f1": 0.11313639220615965
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.4990059642147117,
20
  "domain_accuracy": 0.540650406504065,
21
  "general_accuracy": 0.46303501945525294,
Baseline/ArabicHard/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/ChineseHard/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.5430338087164656,
10
  "f1": 0.5184973575203542
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.08018350453774808,
13
  "domain_accuracy": 0.8504306969459671,
14
  "general_accuracy": 0.9918682659077048,
 
9
  "recall": 0.5430338087164656,
10
  "f1": 0.5184973575203542
11
  },
12
+ "span_level": {
13
+ "precision": 0.48114067055393583,
14
+ "recall": 0.5266779694823975,
15
+ "f1": 0.5028805408751131,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.5028805408751131,
19
  "spriv": 0.08018350453774808,
20
  "domain_accuracy": 0.8504306969459671,
21
  "general_accuracy": 0.9918682659077048,
Baseline/ChineseHard/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/ChineseHard/350/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.037492844876931886,
10
  "f1": 0.0061925358670732
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.0,
13
  "domain_accuracy": 1.0,
14
  "general_accuracy": 1.0,
 
9
  "recall": 0.037492844876931886,
10
  "f1": 0.0061925358670732
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.0,
20
  "domain_accuracy": 1.0,
21
  "general_accuracy": 1.0,
Baseline/ChineseHard/350/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/ChineseHard/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.41914893617021276,
10
  "f1": 0.37381404174573063
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.06382978723404255,
13
  "domain_accuracy": 0.9641434262948207,
14
  "general_accuracy": 0.9041095890410958,
 
9
  "recall": 0.41914893617021276,
10
  "f1": 0.37381404174573063
11
  },
12
+ "span_level": {
13
+ "precision": 0.3493150684931507,
14
+ "recall": 0.4340425531914894,
15
+ "f1": 0.3870967741935484,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.3870967741935484,
19
  "spriv": 0.06382978723404255,
20
  "domain_accuracy": 0.9641434262948207,
21
  "general_accuracy": 0.9041095890410958,
Baseline/ChineseHard/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/EnglishEasy/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.3649923017705928,
10
  "f1": 0.40714899098325463
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.28954965357967666,
13
  "domain_accuracy": 0.48228713486637664,
14
  "general_accuracy": 0.9083557951482479,
 
9
  "recall": 0.3649923017705928,
10
  "f1": 0.40714899098325463
11
  },
12
+ "span_level": {
13
+ "precision": 0.4071601941747573,
14
+ "recall": 0.32284449576597385,
15
+ "f1": 0.3601331043366252,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.3601331043366252,
19
  "spriv": 0.28954965357967666,
20
  "domain_accuracy": 0.48228713486637664,
21
  "general_accuracy": 0.9083557951482479,
Baseline/EnglishEasy/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/EnglishEasy/350/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.47451963241436923,
10
  "f1": 0.3164051620090985
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.10665552770815928,
13
  "domain_accuracy": 0.8769438810006761,
14
  "general_accuracy": 0.9048295454545454,
 
9
  "recall": 0.47451963241436923,
10
  "f1": 0.3164051620090985
11
  },
12
+ "span_level": {
13
+ "precision": 0.017827298050139277,
14
+ "recall": 0.03564466722361459,
15
+ "f1": 0.023767523906786745,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.023767523906786745,
19
  "spriv": 0.10665552770815928,
20
  "domain_accuracy": 0.8769438810006761,
21
  "general_accuracy": 0.9048295454545454,
Baseline/EnglishEasy/350/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/EnglishEasy/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.46255506607929514,
10
  "f1": 0.1586502140518761
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.09397944199706314,
13
  "domain_accuracy": 0.9243697478991597,
14
  "general_accuracy": 0.8961625282167043,
 
9
  "recall": 0.46255506607929514,
10
  "f1": 0.1586502140518761
11
  },
12
+ "span_level": {
13
+ "precision": 0.0060790273556231,
14
+ "recall": 0.02936857562408223,
15
+ "f1": 0.010073029463611181,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.010073029463611181,
19
  "spriv": 0.09397944199706314,
20
  "domain_accuracy": 0.9243697478991597,
21
  "general_accuracy": 0.8961625282167043,
Baseline/EnglishEasy/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/EnglishHard/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.3670631855399351,
10
  "f1": 0.43264897346019027
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.3542406921056697,
13
  "domain_accuracy": 0.38188647746243737,
14
  "general_accuracy": 0.8008339465293107,
 
9
  "recall": 0.3670631855399351,
10
  "f1": 0.43264897346019027
11
  },
12
+ "span_level": {
13
+ "precision": 0.3734619221815763,
14
+ "recall": 0.260234821566507,
15
+ "f1": 0.30673282651249595,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.30673282651249595,
19
  "spriv": 0.3542406921056697,
20
  "domain_accuracy": 0.38188647746243737,
21
  "general_accuracy": 0.8008339465293107,
Baseline/EnglishHard/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/EnglishHard/350/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.3416087388282026,
10
  "f1": 0.31915880624710063
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.23535253227408143,
13
  "domain_accuracy": 0.6299472295514512,
14
  "general_accuracy": 0.9003322259136213,
 
9
  "recall": 0.3416087388282026,
10
  "f1": 0.31915880624710063
11
  },
12
+ "span_level": {
13
+ "precision": 0.0803830528148578,
14
+ "recall": 0.09169149288315127,
15
+ "f1": 0.08566568733570434,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.08566568733570434,
19
  "spriv": 0.23535253227408143,
20
  "domain_accuracy": 0.6299472295514512,
21
  "general_accuracy": 0.9003322259136213,
Baseline/EnglishHard/350/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/EnglishHard/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.3154471544715447,
10
  "f1": 0.15476665337056242
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.2845528455284553,
13
  "domain_accuracy": 0.7673469387755102,
14
  "general_accuracy": 0.6810810810810811,
 
9
  "recall": 0.3154471544715447,
10
  "f1": 0.15476665337056242
11
  },
12
+ "span_level": {
13
+ "precision": 0.007399577167019027,
14
+ "recall": 0.022764227642276424,
15
+ "f1": 0.011168727562824091,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.011168727562824091,
19
  "spriv": 0.2845528455284553,
20
  "domain_accuracy": 0.7673469387755102,
21
  "general_accuracy": 0.6810810810810811,
Baseline/EnglishHard/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/Mix/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.3889985005129824,
10
  "f1": 0.3830878638324331
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.2861652592534133,
13
  "domain_accuracy": 0.5688819390148554,
14
  "general_accuracy": 0.8615360101975781,
 
9
  "recall": 0.3889985005129824,
10
  "f1": 0.3830878638324331
11
  },
12
+ "span_level": {
13
+ "precision": 0.241080998315725,
14
+ "recall": 0.24852024307473758,
15
+ "f1": 0.24474410290288734,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.24474410290288734,
19
  "spriv": 0.2861652592534133,
20
  "domain_accuracy": 0.5688819390148554,
21
  "general_accuracy": 0.8615360101975781,
Baseline/Mix/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/Mix/350/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.39167120304844855,
10
  "f1": 0.24881127345033288
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.2476864452912357,
13
  "domain_accuracy": 0.6790314270994333,
14
  "general_accuracy": 0.8343912290825158,
 
9
  "recall": 0.39167120304844855,
10
  "f1": 0.24881127345033288
11
  },
12
+ "span_level": {
13
+ "precision": 0.13581654630685416,
14
+ "recall": 0.29178007621121393,
15
+ "f1": 0.18535488890810062,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.18535488890810062,
19
  "spriv": 0.2476864452912357,
20
  "domain_accuracy": 0.6790314270994333,
21
  "general_accuracy": 0.8343912290825158,
Baseline/Mix/350/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/Mix/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.008025682182985553,
10
  "f1": 0.0027337342810278844
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.0,
13
  "domain_accuracy": 1.0,
14
  "general_accuracy": 1.0,
 
9
  "recall": 0.008025682182985553,
10
  "f1": 0.0027337342810278844
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.0,
20
  "domain_accuracy": 1.0,
21
  "general_accuracy": 1.0,
Baseline/Mix/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/Arabic/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.44793772745653054,
10
  "f1": 0.5088716623600344
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.2431257581884351,
13
  "domain_accuracy": 0.6751156708911688,
14
  "general_accuracy": 0.83946352367405,
 
9
  "recall": 0.44793772745653054,
10
  "f1": 0.5088716623600344
11
  },
12
+ "span_level": {
13
+ "precision": 0.9436395055164163,
14
+ "recall": 0.7176506267691064,
15
+ "f1": 0.8152741889175998,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.8152741889175998,
19
  "spriv": 0.2431257581884351,
20
  "domain_accuracy": 0.6751156708911688,
21
  "general_accuracy": 0.83946352367405,
Baseline/fuzzy/Arabic/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/Arabic/350/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.5014025245441796,
10
  "f1": 0.5629921259842519
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.23387096774193547,
13
  "domain_accuracy": 0.7302357836338419,
14
  "general_accuracy": 0.8028368794326242,
 
9
  "recall": 0.5014025245441796,
10
  "f1": 0.5629921259842519
11
  },
12
+ "span_level": {
13
+ "precision": 0.9111310592459605,
14
+ "recall": 0.711781206171108,
15
+ "f1": 0.7992125984251968,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.7992125984251968,
19
  "spriv": 0.23387096774193547,
20
  "domain_accuracy": 0.7302357836338419,
21
  "general_accuracy": 0.8028368794326242,
Baseline/fuzzy/Arabic/350/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/Arabic/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.5188866799204771,
10
  "f1": 0.5925085130533484
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.2504970178926441,
13
  "domain_accuracy": 0.7764227642276422,
14
  "general_accuracy": 0.7237354085603113,
 
9
  "recall": 0.5188866799204771,
10
  "f1": 0.5925085130533484
11
  },
12
+ "span_level": {
13
+ "precision": 0.9259259259259259,
14
+ "recall": 0.6958250497017893,
15
+ "f1": 0.7945516458569807,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.7945516458569807,
19
  "spriv": 0.2504970178926441,
20
  "domain_accuracy": 0.7764227642276422,
21
  "general_accuracy": 0.7237354085603113,
Baseline/fuzzy/Arabic/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/Chinese/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.4229580133639174,
10
  "f1": 0.3777500668032422
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.08377381071108009,
13
  "domain_accuracy": 0.9048551292090838,
14
  "general_accuracy": 0.9280341532831876,
 
9
  "recall": 0.4229580133639174,
10
  "f1": 0.3777500668032422
11
  },
12
+ "span_level": {
13
+ "precision": 0.4347791100024141,
14
+ "recall": 0.5388451181809115,
15
+ "f1": 0.4812505566936849,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.4812505566936849,
19
  "spriv": 0.08377381071108009,
20
  "domain_accuracy": 0.9048551292090838,
21
  "general_accuracy": 0.9280341532831876,
Baseline/fuzzy/Chinese/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/Chinese/350/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.3033772180881511,
10
  "f1": 0.39969834087481143
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.48454493417286776,
13
  "domain_accuracy": 0.4910344827586207,
14
  "general_accuracy": 0.5327788649706457,
 
9
  "recall": 0.3033772180881511,
10
  "f1": 0.39969834087481143
11
  },
12
+ "span_level": {
13
+ "precision": 0.980110497237569,
14
+ "recall": 0.5077275329135661,
15
+ "f1": 0.6689291101055806,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.6689291101055806,
19
  "spriv": 0.48454493417286776,
20
  "domain_accuracy": 0.4910344827586207,
21
  "general_accuracy": 0.5327788649706457,
Baseline/fuzzy/Chinese/350/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/Chinese/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.4531914893617021,
10
  "f1": 0.556135770234987
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.3702127659574468,
13
  "domain_accuracy": 0.6932270916334662,
14
  "general_accuracy": 0.5570776255707762,
 
9
  "recall": 0.4531914893617021,
10
  "f1": 0.556135770234987
11
  },
12
+ "span_level": {
13
+ "precision": 1.0,
14
+ "recall": 0.6297872340425532,
15
+ "f1": 0.7728459530026109,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.7728459530026109,
19
  "spriv": 0.3702127659574468,
20
  "domain_accuracy": 0.6932270916334662,
21
  "general_accuracy": 0.5570776255707762,
Baseline/fuzzy/Chinese/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/EnglishEasy/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.2544264819091609,
10
  "f1": 0.3032457850670949
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.3286181678214011,
13
  "domain_accuracy": 0.4520406049305987,
14
  "general_accuracy": 0.8616352201257862,
 
9
  "recall": 0.2544264819091609,
10
  "f1": 0.3032457850670949
11
  },
12
+ "span_level": {
13
+ "precision": 0.9261992619926199,
14
+ "recall": 0.6279830638953041,
15
+ "f1": 0.7484803303131092,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.7484803303131092,
19
  "spriv": 0.3286181678214011,
20
  "domain_accuracy": 0.4520406049305987,
21
  "general_accuracy": 0.8616352201257862,
Baseline/fuzzy/EnglishEasy/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/EnglishEasy/350/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.5346700083542189,
10
  "f1": 0.5922270203578038
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.19910888331940962,
13
  "domain_accuracy": 0.8235294117647058,
14
  "general_accuracy": 0.7850378787878788,
 
9
  "recall": 0.5346700083542189,
10
  "f1": 0.5922270203578038
11
  },
12
+ "span_level": {
13
+ "precision": 0.9433114414103008,
14
+ "recall": 0.7599554441659705,
15
+ "f1": 0.8417643429981494,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.8417643429981494,
19
  "spriv": 0.19910888331940962,
20
  "domain_accuracy": 0.8235294117647058,
21
  "general_accuracy": 0.7850378787878788,
Baseline/fuzzy/EnglishEasy/350/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/EnglishEasy/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.591776798825257,
10
  "f1": 0.6984402079722704
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.3054331864904552,
13
  "domain_accuracy": 0.957983193277311,
14
  "general_accuracy": 0.5530474040632054,
 
9
  "recall": 0.591776798825257,
10
  "f1": 0.6984402079722704
11
  },
12
+ "span_level": {
13
+ "precision": 0.9894291754756871,
14
+ "recall": 0.6872246696035242,
15
+ "f1": 0.8110918544194107,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.8110918544194107,
19
  "spriv": 0.3054331864904552,
20
  "domain_accuracy": 0.957983193277311,
21
  "general_accuracy": 0.5530474040632054,
Baseline/fuzzy/EnglishEasy/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Baseline/fuzzy/EnglishHard/1000/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.42522786961223547,
10
  "f1": 0.4754707203316635
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.22539780627220762,
13
  "domain_accuracy": 0.5300500834724541,
14
  "general_accuracy": 0.9183222958057395,
 
9
  "recall": 0.42522786961223547,
10
  "f1": 0.4754707203316635
11
  },
12
+ "span_level": {
13
+ "precision": 0.9123408423114594,
14
+ "recall": 0.7195272671095319,
15
+ "f1": 0.8045430989808257,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.8045430989808257,
19
  "spriv": 0.22539780627220762,
20
  "domain_accuracy": 0.5300500834724541,
21
  "general_accuracy": 0.9183222958057395,
Baseline/fuzzy/EnglishHard/1000/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff