Recompute metrics batch 4
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Liquid350M/promptExperiments/ArabicHard/Basic/50/metrics_test_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/Basic/50/results_test_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/Mixed/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/Mixed/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/Mixed/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/Mixed/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/Mixed/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/Mixed/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/hfMetrics/metrics_test_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/hfMetrics/results_test_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/hfMetrics/metrics_test_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/hfMetrics/results_test_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/hfMetrics/metrics_test_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/hfMetrics/results_test_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/WithLabels/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/WithLabels/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/WithLabels/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/WithLabels/350/metrics_test_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/WithLabels/350/results_test_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/WithLabels/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/WithLabels/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/WithLabels/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/WithTerms/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/WithTerms/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/ArabicHard/WithTerms/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/ArabicHard/WithTerms/350/results_with_metrics_new.csv +0 -0
Liquid350M/promptExperiments/ArabicHard/Basic/50/metrics_test_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.07554671968190854,
|
| 10 |
"f1": 0.039874081846799574
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.29025844930417494,
|
| 13 |
"domain_accuracy": 0.6747967479674797,
|
| 14 |
"general_accuracy": 0.7431906614785992,
|
|
|
|
| 9 |
"recall": 0.07554671968190854,
|
| 10 |
"f1": 0.039874081846799574
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.29025844930417494,
|
| 20 |
"domain_accuracy": 0.6747967479674797,
|
| 21 |
"general_accuracy": 0.7431906614785992,
|
Liquid350M/promptExperiments/ArabicHard/Basic/50/results_test_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/Mixed/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.0,
|
| 10 |
"f1": 0.0
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.9593202883625128,
|
| 13 |
"domain_accuracy": 0.0296760710553814,
|
| 14 |
"general_accuracy": 0.05137055837563452,
|
|
|
|
| 9 |
"recall": 0.0,
|
| 10 |
"f1": 0.0
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.9593202883625128,
|
| 20 |
"domain_accuracy": 0.0296760710553814,
|
| 21 |
"general_accuracy": 0.05137055837563452,
|
Liquid350M/promptExperiments/ArabicHard/Mixed/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/Mixed/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.0,
|
| 10 |
"f1": 0.0
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.9637482900136799,
|
| 13 |
"domain_accuracy": 0.020708082832331328,
|
| 14 |
"general_accuracy": 0.05255781359495445,
|
|
|
|
| 9 |
"recall": 0.0,
|
| 10 |
"f1": 0.0
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.9637482900136799,
|
| 20 |
"domain_accuracy": 0.020708082832331328,
|
| 21 |
"general_accuracy": 0.05255781359495445,
|
Liquid350M/promptExperiments/ArabicHard/Mixed/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/Mixed/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.004081632653061225,
|
| 10 |
"f1": 0.005502063273727648
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.7857142857142857,
|
| 13 |
"domain_accuracy": 0.19215686274509805,
|
| 14 |
"general_accuracy": 0.23829787234042554,
|
|
|
|
| 9 |
"recall": 0.004081632653061225,
|
| 10 |
"f1": 0.005502063273727648
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.7857142857142857,
|
| 20 |
"domain_accuracy": 0.19215686274509805,
|
| 21 |
"general_accuracy": 0.23829787234042554,
|
Liquid350M/promptExperiments/ArabicHard/Mixed/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.03192584963954686,
|
| 10 |
"f1": 0.02319838359649779
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.49557157569515964,
|
| 13 |
"domain_accuracy": 0.44242424242424244,
|
| 14 |
"general_accuracy": 0.5646700507614213,
|
|
|
|
| 9 |
"recall": 0.03192584963954686,
|
| 10 |
"f1": 0.02319838359649779
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.00011753643629525152,
|
| 14 |
+
"recall": 0.00020597322348094748,
|
| 15 |
+
"f1": 0.00014966699094514705,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.00014966699094514705,
|
| 19 |
"spriv": 0.49557157569515964,
|
| 20 |
"domain_accuracy": 0.44242424242424244,
|
| 21 |
"general_accuracy": 0.5646700507614213,
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.03488372093023256,
|
| 10 |
"f1": 0.025067584173015484
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.48187414500683995,
|
| 13 |
"domain_accuracy": 0.45357381429525717,
|
| 14 |
"general_accuracy": 0.5858444288717589,
|
|
|
|
| 9 |
"recall": 0.03488372093023256,
|
| 10 |
"f1": 0.025067584173015484
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0005753739930955121,
|
| 14 |
+
"recall": 0.0010259917920656635,
|
| 15 |
+
"f1": 0.0007372818874416318,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0007372818874416318,
|
| 19 |
"spriv": 0.48187414500683995,
|
| 20 |
"domain_accuracy": 0.45357381429525717,
|
| 21 |
"general_accuracy": 0.5858444288717589,
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.030612244897959183,
|
| 10 |
"f1": 0.01634877384196185
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.4204081632653061,
|
| 13 |
"domain_accuracy": 0.5490196078431373,
|
| 14 |
"general_accuracy": 0.6127659574468085,
|
|
|
|
| 9 |
"recall": 0.030612244897959183,
|
| 10 |
"f1": 0.01634877384196185
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.4204081632653061,
|
| 20 |
"domain_accuracy": 0.5490196078431373,
|
| 21 |
"general_accuracy": 0.6127659574468085,
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top1/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.009577754891864057,
|
| 10 |
"f1": 0.009881002974925626
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.7713697219361483,
|
| 13 |
"domain_accuracy": 0.16342737722048067,
|
| 14 |
"general_accuracy": 0.29197969543147206,
|
|
|
|
| 9 |
"recall": 0.009577754891864057,
|
| 10 |
"f1": 0.009881002974925626
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.7713697219361483,
|
| 20 |
"domain_accuracy": 0.16342737722048067,
|
| 21 |
"general_accuracy": 0.29197969543147206,
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.01573187414500684,
|
| 10 |
"f1": 0.013064470320931554
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.6733926128590971,
|
| 13 |
"domain_accuracy": 0.24916499665998665,
|
| 14 |
"general_accuracy": 0.4078486334968465,
|
|
|
|
| 9 |
"recall": 0.01573187414500684,
|
| 10 |
"f1": 0.013064470320931554
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.6733926128590971,
|
| 20 |
"domain_accuracy": 0.24916499665998665,
|
| 21 |
"general_accuracy": 0.4078486334968465,
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.036734693877551024,
|
| 10 |
"f1": 0.022263450834879406
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.45510204081632655,
|
| 13 |
"domain_accuracy": 0.4980392156862745,
|
| 14 |
"general_accuracy": 0.5957446808510638,
|
|
|
|
| 9 |
"recall": 0.036734693877551024,
|
| 10 |
"f1": 0.022263450834879406
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.45510204081632655,
|
| 20 |
"domain_accuracy": 0.4980392156862745,
|
| 21 |
"general_accuracy": 0.5957446808510638,
|
Liquid350M/promptExperiments/ArabicHard/RAG_3word_Top3/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/hfMetrics/metrics_test_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.021835826930853213,
|
| 10 |
"f1": 0.02535657686212361
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.6447634452082491,
|
| 13 |
"domain_accuracy": 0.2689599678133172,
|
| 14 |
"general_accuracy": 0.4423897581792319,
|
|
|
|
| 9 |
"recall": 0.021835826930853213,
|
| 10 |
"f1": 0.02535657686212361
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0008397480755773268,
|
| 14 |
+
"recall": 0.000606550748079256,
|
| 15 |
+
"f1": 0.0007043493572812114,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0007043493572812114,
|
| 19 |
"spriv": 0.6447634452082491,
|
| 20 |
"domain_accuracy": 0.2689599678133172,
|
| 21 |
"general_accuracy": 0.4423897581792319,
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/hfMetrics/results_test_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.04304840370751802,
|
| 10 |
"f1": 0.0249247190006261
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.40061791967044286,
|
| 13 |
"domain_accuracy": 0.548380355276907,
|
| 14 |
"general_accuracy": 0.6489340101522842,
|
|
|
|
| 9 |
"recall": 0.04304840370751802,
|
| 10 |
"f1": 0.0249247190006261
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 4.19621501405732e-05,
|
| 14 |
+
"recall": 0.00010298661174047374,
|
| 15 |
+
"f1": 5.962851435556483e-05,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 5.962851435556483e-05,
|
| 19 |
"spriv": 0.40061791967044286,
|
| 20 |
"domain_accuracy": 0.548380355276907,
|
| 21 |
"general_accuracy": 0.6489340101522842,
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/hfMetrics/metrics_test_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.030154277699859747,
|
| 10 |
"f1": 0.040903686087990485
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.735273492286115,
|
| 13 |
"domain_accuracy": 0.21636615811373092,
|
| 14 |
"general_accuracy": 0.3141843971631206,
|
|
|
|
| 9 |
"recall": 0.030154277699859747,
|
| 10 |
"f1": 0.040903686087990485
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0014781966001478197,
|
| 14 |
+
"recall": 0.0007012622720897616,
|
| 15 |
+
"f1": 0.0009512485136741973,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0009512485136741973,
|
| 19 |
"spriv": 0.735273492286115,
|
| 20 |
"domain_accuracy": 0.21636615811373092,
|
| 21 |
"general_accuracy": 0.3141843971631206,
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/hfMetrics/results_test_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.04480164158686731,
|
| 10 |
"f1": 0.02595344229816741
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.3847469220246238,
|
| 13 |
"domain_accuracy": 0.5671342685370742,
|
| 14 |
"general_accuracy": 0.6657323055360898,
|
|
|
|
| 9 |
"recall": 0.04480164158686731,
|
| 10 |
"f1": 0.02595344229816741
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0002789011295495747,
|
| 14 |
+
"recall": 0.0006839945280437756,
|
| 15 |
+
"f1": 0.0003962357602773651,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0003962357602773651,
|
| 19 |
"spriv": 0.3847469220246238,
|
| 20 |
"domain_accuracy": 0.5671342685370742,
|
| 21 |
"general_accuracy": 0.6657323055360898,
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.04081632653061224,
|
| 10 |
"f1": 0.021893814997263273
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.38571428571428573,
|
| 13 |
"domain_accuracy": 0.596078431372549,
|
| 14 |
"general_accuracy": 0.6340425531914894,
|
|
|
|
| 9 |
"recall": 0.04081632653061224,
|
| 10 |
"f1": 0.021893814997263273
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0007479431563201197,
|
| 14 |
+
"recall": 0.0020408163265306124,
|
| 15 |
+
"f1": 0.0010946907498631637,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0010946907498631637,
|
| 19 |
"spriv": 0.38571428571428573,
|
| 20 |
"domain_accuracy": 0.596078431372549,
|
| 21 |
"general_accuracy": 0.6340425531914894,
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top1/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.026879505664263646,
|
| 10 |
"f1": 0.019131390874106653
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.5656024716786817,
|
| 13 |
"domain_accuracy": 0.36802507836990594,
|
| 14 |
"general_accuracy": 0.49888324873096446,
|
|
|
|
| 9 |
"recall": 0.026879505664263646,
|
| 10 |
"f1": 0.019131390874106653
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.5656024716786817,
|
| 20 |
"domain_accuracy": 0.36802507836990594,
|
| 21 |
"general_accuracy": 0.49888324873096446,
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.030437756497948016,
|
| 10 |
"f1": 0.021042676439295426
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.5068399452804377,
|
| 13 |
"domain_accuracy": 0.42818971275885104,
|
| 14 |
"general_accuracy": 0.5613174491941135,
|
|
|
|
| 9 |
"recall": 0.030437756497948016,
|
| 10 |
"f1": 0.021042676439295426
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.5068399452804377,
|
| 20 |
"domain_accuracy": 0.42818971275885104,
|
| 21 |
"general_accuracy": 0.5613174491941135,
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/hfMetrics/metrics_test_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.03777335984095427,
|
| 10 |
"f1": 0.03281519861830743
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.5407554671968191,
|
| 13 |
"domain_accuracy": 0.3821138211382114,
|
| 14 |
"general_accuracy": 0.5330739299610895,
|
|
|
|
| 9 |
"recall": 0.03777335984095427,
|
| 10 |
"f1": 0.03281519861830743
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.5407554671968191,
|
| 20 |
"domain_accuracy": 0.3821138211382114,
|
| 21 |
"general_accuracy": 0.5330739299610895,
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/hfMetrics/results_test_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.05102040816326531,
|
| 10 |
"f1": 0.029308323563892145
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.3979591836734694,
|
| 13 |
"domain_accuracy": 0.5764705882352941,
|
| 14 |
"general_accuracy": 0.6297872340425532,
|
|
|
|
| 9 |
"recall": 0.05102040816326531,
|
| 10 |
"f1": 0.029308323563892145
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0008223684210526315,
|
| 14 |
+
"recall": 0.0020408163265306124,
|
| 15 |
+
"f1": 0.0011723329425556857,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0011723329425556857,
|
| 19 |
"spriv": 0.3979591836734694,
|
| 20 |
"domain_accuracy": 0.5764705882352941,
|
| 21 |
"general_accuracy": 0.6297872340425532,
|
Liquid350M/promptExperiments/ArabicHard/RAG_6word_Top3/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/WithLabels/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.029248197734294542,
|
| 10 |
"f1": 0.022639403722747024
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.588259526261586,
|
| 13 |
"domain_accuracy": 0.3931034482758621,
|
| 14 |
"general_accuracy": 0.42984771573604064,
|
|
|
|
| 9 |
"recall": 0.029248197734294542,
|
| 10 |
"f1": 0.022639403722747024
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.588259526261586,
|
| 20 |
"domain_accuracy": 0.3931034482758621,
|
| 21 |
"general_accuracy": 0.42984771573604064,
|
Liquid350M/promptExperiments/ArabicHard/WithLabels/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/WithLabels/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.03796169630642955,
|
| 10 |
"f1": 0.02683752417794971
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.533515731874145,
|
| 13 |
"domain_accuracy": 0.43353373413493657,
|
| 14 |
"general_accuracy": 0.5010511562718991,
|
|
|
|
| 9 |
"recall": 0.03796169630642955,
|
| 10 |
"f1": 0.02683752417794971
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.00018698578908002991,
|
| 14 |
+
"recall": 0.0003419972640218878,
|
| 15 |
+
"f1": 0.00024177949709864604,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.00024177949709864604,
|
| 19 |
"spriv": 0.533515731874145,
|
| 20 |
"domain_accuracy": 0.43353373413493657,
|
| 21 |
"general_accuracy": 0.5010511562718991,
|
Liquid350M/promptExperiments/ArabicHard/WithLabels/350/metrics_test_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.03506311360448808,
|
| 10 |
"f1": 0.025390377047099146
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.5536465638148668,
|
| 13 |
"domain_accuracy": 0.4042995839112344,
|
| 14 |
"general_accuracy": 0.48936170212765956,
|
|
|
|
| 9 |
"recall": 0.03506311360448808,
|
| 10 |
"f1": 0.025390377047099146
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0001990049751243781,
|
| 14 |
+
"recall": 0.0003506311360448808,
|
| 15 |
+
"f1": 0.00025390377047099146,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.00025390377047099146,
|
| 19 |
"spriv": 0.5536465638148668,
|
| 20 |
"domain_accuracy": 0.4042995839112344,
|
| 21 |
"general_accuracy": 0.48936170212765956,
|
Liquid350M/promptExperiments/ArabicHard/WithLabels/350/results_test_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/WithLabels/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/WithLabels/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.02040816326530612,
|
| 10 |
"f1": 0.012383900928792569
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.45510204081632655,
|
| 13 |
"domain_accuracy": 0.5176470588235295,
|
| 14 |
"general_accuracy": 0.574468085106383,
|
|
|
|
| 9 |
"recall": 0.02040816326530612,
|
| 10 |
"f1": 0.012383900928792569
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.45510204081632655,
|
| 20 |
"domain_accuracy": 0.5176470588235295,
|
| 21 |
"general_accuracy": 0.574468085106383,
|
Liquid350M/promptExperiments/ArabicHard/WithLabels/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/WithTerms/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.0,
|
| 10 |
"f1": 0.0
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.9578784757981462,
|
| 13 |
"domain_accuracy": 0.026541274817136886,
|
| 14 |
"general_accuracy": 0.05725888324873096,
|
|
|
|
| 9 |
"recall": 0.0,
|
| 10 |
"f1": 0.0
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.9578784757981462,
|
| 20 |
"domain_accuracy": 0.026541274817136886,
|
| 21 |
"general_accuracy": 0.05725888324873096,
|
Liquid350M/promptExperiments/ArabicHard/WithTerms/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/ArabicHard/WithTerms/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.0,
|
| 10 |
"f1": 0.0
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.9681942544459644,
|
| 13 |
"domain_accuracy": 0.014028056112224449,
|
| 14 |
"general_accuracy": 0.050455501051156273,
|
|
|
|
| 9 |
"recall": 0.0,
|
| 10 |
"f1": 0.0
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.9681942544459644,
|
| 20 |
"domain_accuracy": 0.014028056112224449,
|
| 21 |
"general_accuracy": 0.050455501051156273,
|
Liquid350M/promptExperiments/ArabicHard/WithTerms/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|