mah04 commited on
Commit
cab0adb
·
verified ·
1 Parent(s): cea6ddf

Recompute metrics batch 4

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Liquid350M/promptExperiments/ArabicHard/Basic/50/metrics_test_new.json +7 -0
  2. Liquid350M/promptExperiments/ArabicHard/Basic/50/results_test_with_metrics_new.csv +0 -0
  3. Liquid350M/promptExperiments/ArabicHard/Mixed/1000/metrics_new.json +7 -0
  4. Liquid350M/promptExperiments/ArabicHard/Mixed/1000/results_with_metrics_new.csv +0 -0
  5. Liquid350M/promptExperiments/ArabicHard/Mixed/350/metrics_new.json +7 -0
  6. Liquid350M/promptExperiments/ArabicHard/Mixed/350/results_with_metrics_new.csv +0 -0
  7. Liquid350M/promptExperiments/ArabicHard/Mixed/50/metrics_new.json +7 -0
  8. Liquid350M/promptExperiments/ArabicHard/Mixed/50/results_with_metrics_new.csv +0 -0
  9. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/1000/metrics_new.json +7 -0
  10. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/1000/results_with_metrics_new.csv +0 -0
  11. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/350/metrics_new.json +7 -0
  12. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/350/results_with_metrics_new.csv +0 -0
  13. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/50/metrics_new.json +7 -0
  14. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/50/results_with_metrics_new.csv +0 -0
  15. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/1000/metrics_new.json +7 -0
  16. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/1000/results_with_metrics_new.csv +0 -0
  17. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/350/metrics_new.json +7 -0
  18. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/350/results_with_metrics_new.csv +0 -0
  19. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/50/metrics_new.json +7 -0
  20. Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/50/results_with_metrics_new.csv +0 -0
  21. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/hfMetrics/metrics_test_new.json +7 -0
  22. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/hfMetrics/results_test_with_metrics_new.csv +0 -0
  23. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/metrics_new.json +7 -0
  24. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/results_with_metrics_new.csv +0 -0
  25. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/hfMetrics/metrics_test_new.json +7 -0
  26. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/hfMetrics/results_test_with_metrics_new.csv +0 -0
  27. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/metrics_new.json +7 -0
  28. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/results_with_metrics_new.csv +0 -0
  29. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/50/metrics_new.json +7 -0
  30. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/50/results_with_metrics_new.csv +0 -0
  31. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/1000/metrics_new.json +7 -0
  32. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/1000/results_with_metrics_new.csv +0 -0
  33. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/350/metrics_new.json +7 -0
  34. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/350/results_with_metrics_new.csv +0 -0
  35. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/hfMetrics/metrics_test_new.json +7 -0
  36. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/hfMetrics/results_test_with_metrics_new.csv +0 -0
  37. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/metrics_new.json +7 -0
  38. Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/results_with_metrics_new.csv +0 -0
  39. Liquid350M/promptExperiments/ArabicHard/WithLabels/1000/metrics_new.json +7 -0
  40. Liquid350M/promptExperiments/ArabicHard/WithLabels/1000/results_with_metrics_new.csv +0 -0
  41. Liquid350M/promptExperiments/ArabicHard/WithLabels/350/metrics_new.json +7 -0
  42. Liquid350M/promptExperiments/ArabicHard/WithLabels/350/metrics_test_new.json +7 -0
  43. Liquid350M/promptExperiments/ArabicHard/WithLabels/350/results_test_with_metrics_new.csv +0 -0
  44. Liquid350M/promptExperiments/ArabicHard/WithLabels/350/results_with_metrics_new.csv +0 -0
  45. Liquid350M/promptExperiments/ArabicHard/WithLabels/50/metrics_new.json +7 -0
  46. Liquid350M/promptExperiments/ArabicHard/WithLabels/50/results_with_metrics_new.csv +0 -0
  47. Liquid350M/promptExperiments/ArabicHard/WithTerms/1000/metrics_new.json +7 -0
  48. Liquid350M/promptExperiments/ArabicHard/WithTerms/1000/results_with_metrics_new.csv +0 -0
  49. Liquid350M/promptExperiments/ArabicHard/WithTerms/350/metrics_new.json +7 -0
  50. Liquid350M/promptExperiments/ArabicHard/WithTerms/350/results_with_metrics_new.csv +0 -0
Liquid350M/promptExperiments/ArabicHard/Basic/50/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.07554671968190854,
10
  "f1": 0.039874081846799574
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.29025844930417494,
13
  "domain_accuracy": 0.6747967479674797,
14
  "general_accuracy": 0.7431906614785992,
 
9
  "recall": 0.07554671968190854,
10
  "f1": 0.039874081846799574
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.29025844930417494,
20
  "domain_accuracy": 0.6747967479674797,
21
  "general_accuracy": 0.7431906614785992,
Liquid350M/promptExperiments/ArabicHard/Basic/50/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/Mixed/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.0,
10
  "f1": 0.0
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.9593202883625128,
13
  "domain_accuracy": 0.0296760710553814,
14
  "general_accuracy": 0.05137055837563452,
 
9
  "recall": 0.0,
10
  "f1": 0.0
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.9593202883625128,
20
  "domain_accuracy": 0.0296760710553814,
21
  "general_accuracy": 0.05137055837563452,
Liquid350M/promptExperiments/ArabicHard/Mixed/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/Mixed/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.0,
10
  "f1": 0.0
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.9637482900136799,
13
  "domain_accuracy": 0.020708082832331328,
14
  "general_accuracy": 0.05255781359495445,
 
9
  "recall": 0.0,
10
  "f1": 0.0
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.9637482900136799,
20
  "domain_accuracy": 0.020708082832331328,
21
  "general_accuracy": 0.05255781359495445,
Liquid350M/promptExperiments/ArabicHard/Mixed/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/Mixed/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.004081632653061225,
10
  "f1": 0.005502063273727648
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.7857142857142857,
13
  "domain_accuracy": 0.19215686274509805,
14
  "general_accuracy": 0.23829787234042554,
 
9
  "recall": 0.004081632653061225,
10
  "f1": 0.005502063273727648
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.7857142857142857,
20
  "domain_accuracy": 0.19215686274509805,
21
  "general_accuracy": 0.23829787234042554,
Liquid350M/promptExperiments/ArabicHard/Mixed/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.03192584963954686,
10
  "f1": 0.02319838359649779
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.49557157569515964,
13
  "domain_accuracy": 0.44242424242424244,
14
  "general_accuracy": 0.5646700507614213,
 
9
  "recall": 0.03192584963954686,
10
  "f1": 0.02319838359649779
11
  },
12
+ "span_level": {
13
+ "precision": 0.00011753643629525152,
14
+ "recall": 0.00020597322348094748,
15
+ "f1": 0.00014966699094514705,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.00014966699094514705,
19
  "spriv": 0.49557157569515964,
20
  "domain_accuracy": 0.44242424242424244,
21
  "general_accuracy": 0.5646700507614213,
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.03488372093023256,
10
  "f1": 0.025067584173015484
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.48187414500683995,
13
  "domain_accuracy": 0.45357381429525717,
14
  "general_accuracy": 0.5858444288717589,
 
9
  "recall": 0.03488372093023256,
10
  "f1": 0.025067584173015484
11
  },
12
+ "span_level": {
13
+ "precision": 0.0005753739930955121,
14
+ "recall": 0.0010259917920656635,
15
+ "f1": 0.0007372818874416318,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0007372818874416318,
19
  "spriv": 0.48187414500683995,
20
  "domain_accuracy": 0.45357381429525717,
21
  "general_accuracy": 0.5858444288717589,
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.030612244897959183,
10
  "f1": 0.01634877384196185
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.4204081632653061,
13
  "domain_accuracy": 0.5490196078431373,
14
  "general_accuracy": 0.6127659574468085,
 
9
  "recall": 0.030612244897959183,
10
  "f1": 0.01634877384196185
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.4204081632653061,
20
  "domain_accuracy": 0.5490196078431373,
21
  "general_accuracy": 0.6127659574468085,
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.009577754891864057,
10
  "f1": 0.009881002974925626
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.7713697219361483,
13
  "domain_accuracy": 0.16342737722048067,
14
  "general_accuracy": 0.29197969543147206,
 
9
  "recall": 0.009577754891864057,
10
  "f1": 0.009881002974925626
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.7713697219361483,
20
  "domain_accuracy": 0.16342737722048067,
21
  "general_accuracy": 0.29197969543147206,
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.01573187414500684,
10
  "f1": 0.013064470320931554
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.6733926128590971,
13
  "domain_accuracy": 0.24916499665998665,
14
  "general_accuracy": 0.4078486334968465,
 
9
  "recall": 0.01573187414500684,
10
  "f1": 0.013064470320931554
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.6733926128590971,
20
  "domain_accuracy": 0.24916499665998665,
21
  "general_accuracy": 0.4078486334968465,
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.036734693877551024,
10
  "f1": 0.022263450834879406
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.45510204081632655,
13
  "domain_accuracy": 0.4980392156862745,
14
  "general_accuracy": 0.5957446808510638,
 
9
  "recall": 0.036734693877551024,
10
  "f1": 0.022263450834879406
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.45510204081632655,
20
  "domain_accuracy": 0.4980392156862745,
21
  "general_accuracy": 0.5957446808510638,
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/hfMetrics/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.021835826930853213,
10
  "f1": 0.02535657686212361
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.6447634452082491,
13
  "domain_accuracy": 0.2689599678133172,
14
  "general_accuracy": 0.4423897581792319,
 
9
  "recall": 0.021835826930853213,
10
  "f1": 0.02535657686212361
11
  },
12
+ "span_level": {
13
+ "precision": 0.0008397480755773268,
14
+ "recall": 0.000606550748079256,
15
+ "f1": 0.0007043493572812114,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0007043493572812114,
19
  "spriv": 0.6447634452082491,
20
  "domain_accuracy": 0.2689599678133172,
21
  "general_accuracy": 0.4423897581792319,
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/hfMetrics/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.04304840370751802,
10
  "f1": 0.0249247190006261
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.40061791967044286,
13
  "domain_accuracy": 0.548380355276907,
14
  "general_accuracy": 0.6489340101522842,
 
9
  "recall": 0.04304840370751802,
10
  "f1": 0.0249247190006261
11
  },
12
+ "span_level": {
13
+ "precision": 4.19621501405732e-05,
14
+ "recall": 0.00010298661174047374,
15
+ "f1": 5.962851435556483e-05,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 5.962851435556483e-05,
19
  "spriv": 0.40061791967044286,
20
  "domain_accuracy": 0.548380355276907,
21
  "general_accuracy": 0.6489340101522842,
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/hfMetrics/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.030154277699859747,
10
  "f1": 0.040903686087990485
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.735273492286115,
13
  "domain_accuracy": 0.21636615811373092,
14
  "general_accuracy": 0.3141843971631206,
 
9
  "recall": 0.030154277699859747,
10
  "f1": 0.040903686087990485
11
  },
12
+ "span_level": {
13
+ "precision": 0.0014781966001478197,
14
+ "recall": 0.0007012622720897616,
15
+ "f1": 0.0009512485136741973,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0009512485136741973,
19
  "spriv": 0.735273492286115,
20
  "domain_accuracy": 0.21636615811373092,
21
  "general_accuracy": 0.3141843971631206,
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/hfMetrics/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.04480164158686731,
10
  "f1": 0.02595344229816741
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.3847469220246238,
13
  "domain_accuracy": 0.5671342685370742,
14
  "general_accuracy": 0.6657323055360898,
 
9
  "recall": 0.04480164158686731,
10
  "f1": 0.02595344229816741
11
  },
12
+ "span_level": {
13
+ "precision": 0.0002789011295495747,
14
+ "recall": 0.0006839945280437756,
15
+ "f1": 0.0003962357602773651,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0003962357602773651,
19
  "spriv": 0.3847469220246238,
20
  "domain_accuracy": 0.5671342685370742,
21
  "general_accuracy": 0.6657323055360898,
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.04081632653061224,
10
  "f1": 0.021893814997263273
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.38571428571428573,
13
  "domain_accuracy": 0.596078431372549,
14
  "general_accuracy": 0.6340425531914894,
 
9
  "recall": 0.04081632653061224,
10
  "f1": 0.021893814997263273
11
  },
12
+ "span_level": {
13
+ "precision": 0.0007479431563201197,
14
+ "recall": 0.0020408163265306124,
15
+ "f1": 0.0010946907498631637,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0010946907498631637,
19
  "spriv": 0.38571428571428573,
20
  "domain_accuracy": 0.596078431372549,
21
  "general_accuracy": 0.6340425531914894,
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.026879505664263646,
10
  "f1": 0.019131390874106653
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.5656024716786817,
13
  "domain_accuracy": 0.36802507836990594,
14
  "general_accuracy": 0.49888324873096446,
 
9
  "recall": 0.026879505664263646,
10
  "f1": 0.019131390874106653
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.5656024716786817,
20
  "domain_accuracy": 0.36802507836990594,
21
  "general_accuracy": 0.49888324873096446,
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.030437756497948016,
10
  "f1": 0.021042676439295426
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.5068399452804377,
13
  "domain_accuracy": 0.42818971275885104,
14
  "general_accuracy": 0.5613174491941135,
 
9
  "recall": 0.030437756497948016,
10
  "f1": 0.021042676439295426
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.5068399452804377,
20
  "domain_accuracy": 0.42818971275885104,
21
  "general_accuracy": 0.5613174491941135,
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/hfMetrics/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.03777335984095427,
10
  "f1": 0.03281519861830743
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.5407554671968191,
13
  "domain_accuracy": 0.3821138211382114,
14
  "general_accuracy": 0.5330739299610895,
 
9
  "recall": 0.03777335984095427,
10
  "f1": 0.03281519861830743
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.5407554671968191,
20
  "domain_accuracy": 0.3821138211382114,
21
  "general_accuracy": 0.5330739299610895,
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/hfMetrics/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.05102040816326531,
10
  "f1": 0.029308323563892145
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.3979591836734694,
13
  "domain_accuracy": 0.5764705882352941,
14
  "general_accuracy": 0.6297872340425532,
 
9
  "recall": 0.05102040816326531,
10
  "f1": 0.029308323563892145
11
  },
12
+ "span_level": {
13
+ "precision": 0.0008223684210526315,
14
+ "recall": 0.0020408163265306124,
15
+ "f1": 0.0011723329425556857,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0011723329425556857,
19
  "spriv": 0.3979591836734694,
20
  "domain_accuracy": 0.5764705882352941,
21
  "general_accuracy": 0.6297872340425532,
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/WithLabels/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.029248197734294542,
10
  "f1": 0.022639403722747024
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.588259526261586,
13
  "domain_accuracy": 0.3931034482758621,
14
  "general_accuracy": 0.42984771573604064,
 
9
  "recall": 0.029248197734294542,
10
  "f1": 0.022639403722747024
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.588259526261586,
20
  "domain_accuracy": 0.3931034482758621,
21
  "general_accuracy": 0.42984771573604064,
Liquid350M/promptExperiments/ArabicHard/WithLabels/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/WithLabels/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.03796169630642955,
10
  "f1": 0.02683752417794971
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.533515731874145,
13
  "domain_accuracy": 0.43353373413493657,
14
  "general_accuracy": 0.5010511562718991,
 
9
  "recall": 0.03796169630642955,
10
  "f1": 0.02683752417794971
11
  },
12
+ "span_level": {
13
+ "precision": 0.00018698578908002991,
14
+ "recall": 0.0003419972640218878,
15
+ "f1": 0.00024177949709864604,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.00024177949709864604,
19
  "spriv": 0.533515731874145,
20
  "domain_accuracy": 0.43353373413493657,
21
  "general_accuracy": 0.5010511562718991,
Liquid350M/promptExperiments/ArabicHard/WithLabels/350/metrics_test_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.03506311360448808,
10
  "f1": 0.025390377047099146
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.5536465638148668,
13
  "domain_accuracy": 0.4042995839112344,
14
  "general_accuracy": 0.48936170212765956,
 
9
  "recall": 0.03506311360448808,
10
  "f1": 0.025390377047099146
11
  },
12
+ "span_level": {
13
+ "precision": 0.0001990049751243781,
14
+ "recall": 0.0003506311360448808,
15
+ "f1": 0.00025390377047099146,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.00025390377047099146,
19
  "spriv": 0.5536465638148668,
20
  "domain_accuracy": 0.4042995839112344,
21
  "general_accuracy": 0.48936170212765956,
Liquid350M/promptExperiments/ArabicHard/WithLabels/350/results_test_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/WithLabels/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/WithLabels/50/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.02040816326530612,
10
  "f1": 0.012383900928792569
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.45510204081632655,
13
  "domain_accuracy": 0.5176470588235295,
14
  "general_accuracy": 0.574468085106383,
 
9
  "recall": 0.02040816326530612,
10
  "f1": 0.012383900928792569
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.45510204081632655,
20
  "domain_accuracy": 0.5176470588235295,
21
  "general_accuracy": 0.574468085106383,
Liquid350M/promptExperiments/ArabicHard/WithLabels/50/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/WithTerms/1000/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.0,
10
  "f1": 0.0
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.9578784757981462,
13
  "domain_accuracy": 0.026541274817136886,
14
  "general_accuracy": 0.05725888324873096,
 
9
  "recall": 0.0,
10
  "f1": 0.0
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.9578784757981462,
20
  "domain_accuracy": 0.026541274817136886,
21
  "general_accuracy": 0.05725888324873096,
Liquid350M/promptExperiments/ArabicHard/WithTerms/1000/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Liquid350M/promptExperiments/ArabicHard/WithTerms/350/metrics_new.json CHANGED
@@ -9,6 +9,13 @@
9
  "recall": 0.0,
10
  "f1": 0.0
11
  },
 
 
 
 
 
 
 
12
  "spriv": 0.9681942544459644,
13
  "domain_accuracy": 0.014028056112224449,
14
  "general_accuracy": 0.050455501051156273,
 
9
  "recall": 0.0,
10
  "f1": 0.0
11
  },
12
+ "span_level": {
13
+ "precision": 0.0,
14
+ "recall": 0.0,
15
+ "f1": 0.0,
16
+ "iou_threshold": 0.5
17
+ },
18
+ "f1_s": 0.0,
19
  "spriv": 0.9681942544459644,
20
  "domain_accuracy": 0.014028056112224449,
21
  "general_accuracy": 0.050455501051156273,
Liquid350M/promptExperiments/ArabicHard/WithTerms/350/results_with_metrics_new.csv CHANGED
The diff for this file is too large to render. See raw diff