Recompute metrics batch 5
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/hfMetrics/metrics_test_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/hfMetrics/results_test_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/metrics_test_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/results_test_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/50/metrics_test_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/50/results_test_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/WithLabels/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/WithTerms/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/WithTerms/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/WithTerms/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/WithTerms/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/EnglishHard/WithTerms/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/EnglishHard/WithTerms/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/Mix/Basic/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/Mix/Basic/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/Mix/Basic/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/Mix/Basic/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/Mix/Basic/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/Mix/Basic/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/Mix/Mixed/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/Mix/Mixed/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/Mix/Mixed/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/Mix/Mixed/350/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/Mix/Mixed/50/metrics_new.json +7 -0
- Liquid350M/promptExperiments/Mix/Mixed/50/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/Mix/RAG_3word_Top1/1000/metrics_new.json +7 -0
- Liquid350M/promptExperiments/Mix/RAG_3word_Top1/1000/results_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/hfMetrics/metrics_test_new.json +7 -0
- Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/hfMetrics/results_test_with_metrics_new.csv +0 -0
- Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/metrics_new.json +7 -0
- Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/results_with_metrics_new.csv +0 -0
Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/hfMetrics/metrics_test_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.0943089430894309,
|
| 10 |
"f1": 0.047366271947733775
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.37886178861788616,
|
| 13 |
"domain_accuracy": 0.6857142857142857,
|
| 14 |
"general_accuracy": 0.5783783783783784,
|
|
|
|
| 9 |
"recall": 0.0943089430894309,
|
| 10 |
"f1": 0.047366271947733775
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0010905125408942203,
|
| 14 |
+
"recall": 0.0032520325203252032,
|
| 15 |
+
"f1": 0.0016333197223356473,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0016333197223356473,
|
| 19 |
"spriv": 0.37886178861788616,
|
| 20 |
"domain_accuracy": 0.6857142857142857,
|
| 21 |
"general_accuracy": 0.5783783783783784,
|
Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/hfMetrics/results_test_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.08660130718954248,
|
| 10 |
"f1": 0.03996983408748115
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.12581699346405228,
|
| 13 |
"domain_accuracy": 0.8844621513944223,
|
| 14 |
"general_accuracy": 0.8670360110803325,
|
|
|
|
| 9 |
"recall": 0.08660130718954248,
|
| 10 |
"f1": 0.03996983408748115
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0029411764705882353,
|
| 14 |
+
"recall": 0.00980392156862745,
|
| 15 |
+
"f1": 0.004524886877828055,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.004524886877828055,
|
| 19 |
"spriv": 0.12581699346405228,
|
| 20 |
"domain_accuracy": 0.8844621513944223,
|
| 21 |
"general_accuracy": 0.8670360110803325,
|
Liquid350M/promptExperiments/EnglishHard/RAG_3word_Top3/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.06006847183317771,
|
| 10 |
"f1": 0.025307326667759387
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.06255835667600373,
|
| 13 |
"domain_accuracy": 0.9220389805097451,
|
| 14 |
"general_accuracy": 0.9462299890015886,
|
|
|
|
| 9 |
"recall": 0.06006847183317771,
|
| 10 |
"f1": 0.025307326667759387
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.000809834295444163,
|
| 14 |
+
"recall": 0.003034547152194211,
|
| 15 |
+
"f1": 0.0012784789378790361,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0012784789378790361,
|
| 19 |
"spriv": 0.06255835667600373,
|
| 20 |
"domain_accuracy": 0.9220389805097451,
|
| 21 |
"general_accuracy": 0.9462299890015886,
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.03402061855670103,
|
| 10 |
"f1": 0.014548126377663482
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.19415807560137457,
|
| 13 |
"domain_accuracy": 0.7739251040221914,
|
| 14 |
"general_accuracy": 0.8371934604904632,
|
|
|
|
| 9 |
"recall": 0.03402061855670103,
|
| 10 |
"f1": 0.014548126377663482
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0007476635514018691,
|
| 14 |
+
"recall": 0.0027491408934707906,
|
| 15 |
+
"f1": 0.0011756061719324026,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0011756061719324026,
|
| 19 |
"spriv": 0.19415807560137457,
|
| 20 |
"domain_accuracy": 0.7739251040221914,
|
| 21 |
"general_accuracy": 0.8371934604904632,
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.07026143790849673,
|
| 10 |
"f1": 0.03517382413087935
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.21568627450980393,
|
| 13 |
"domain_accuracy": 0.8047808764940239,
|
| 14 |
"general_accuracy": 0.7700831024930748,
|
|
|
|
| 9 |
"recall": 0.07026143790849673,
|
| 10 |
"f1": 0.03517382413087935
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0016366612111292963,
|
| 14 |
+
"recall": 0.004901960784313725,
|
| 15 |
+
"f1": 0.00245398773006135,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.00245398773006135,
|
| 19 |
"spriv": 0.21568627450980393,
|
| 20 |
"domain_accuracy": 0.8047808764940239,
|
| 21 |
"general_accuracy": 0.7700831024930748,
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top1/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.05913476501711796,
|
| 10 |
"f1": 0.0248276763255039
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.0647370059134765,
|
| 13 |
"domain_accuracy": 0.9126151210109231,
|
| 14 |
"general_accuracy": 0.9481852621288036,
|
|
|
|
| 9 |
"recall": 0.05913476501711796,
|
| 10 |
"f1": 0.0248276763255039
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0005995451726276618,
|
| 14 |
+
"recall": 0.00225645813881108,
|
| 15 |
+
"f1": 0.0009473718597889648,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0009473718597889648,
|
| 19 |
"spriv": 0.0647370059134765,
|
| 20 |
"domain_accuracy": 0.9126151210109231,
|
| 21 |
"general_accuracy": 0.9481852621288036,
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.036426116838487975,
|
| 10 |
"f1": 0.014436499829758257
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.1508591065292096,
|
| 13 |
"domain_accuracy": 0.8259361997226075,
|
| 14 |
"general_accuracy": 0.8719346049046321,
|
|
|
|
| 9 |
"recall": 0.036426116838487975,
|
| 10 |
"f1": 0.014436499829758257
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0007643312101910828,
|
| 14 |
+
"recall": 0.003092783505154639,
|
| 15 |
+
"f1": 0.0012257405515832482,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0012257405515832482,
|
| 19 |
"spriv": 0.1508591065292096,
|
| 20 |
"domain_accuracy": 0.8259361997226075,
|
| 21 |
"general_accuracy": 0.8719346049046321,
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.0718954248366013,
|
| 10 |
"f1": 0.03471400394477318
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.16666666666666666,
|
| 13 |
"domain_accuracy": 0.848605577689243,
|
| 14 |
"general_accuracy": 0.8227146814404432,
|
|
|
|
| 9 |
"recall": 0.0718954248366013,
|
| 10 |
"f1": 0.03471400394477318
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0010400416016640667,
|
| 14 |
+
"recall": 0.0032679738562091504,
|
| 15 |
+
"f1": 0.0015779092702169627,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0015779092702169627,
|
| 19 |
"spriv": 0.16666666666666666,
|
| 20 |
"domain_accuracy": 0.848605577689243,
|
| 21 |
"general_accuracy": 0.8227146814404432,
|
Liquid350M/promptExperiments/EnglishHard/RAG_6word_Top3/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.08823529411764706,
|
| 10 |
"f1": 0.04761005101076894
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.03649237472766884,
|
| 13 |
"domain_accuracy": 0.9601627757549797,
|
| 14 |
"general_accuracy": 0.9654161065623854,
|
|
|
|
| 9 |
"recall": 0.08823529411764706,
|
| 10 |
"f1": 0.04761005101076894
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 2.874802357337933e-05,
|
| 14 |
+
"recall": 7.78089013383131e-05,
|
| 15 |
+
"f1": 4.198417196716838e-05,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 4.198417196716838e-05,
|
| 19 |
"spriv": 0.03649237472766884,
|
| 20 |
"domain_accuracy": 0.9601627757549797,
|
| 21 |
"general_accuracy": 0.9654161065623854,
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/metrics_test_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.07987023018693033,
|
| 10 |
"f1": 0.0434618132907402
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.0326741850764715,
|
| 13 |
"domain_accuracy": 0.9641068447412354,
|
| 14 |
"general_accuracy": 0.9692175619327937,
|
|
|
|
| 9 |
"recall": 0.07987023018693033,
|
| 10 |
"f1": 0.0434618132907402
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.0326741850764715,
|
| 20 |
"domain_accuracy": 0.9641068447412354,
|
| 21 |
"general_accuracy": 0.9692175619327937,
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/results_test_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.031958762886597936,
|
| 10 |
"f1": 0.014476961394769616
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.07628865979381444,
|
| 13 |
"domain_accuracy": 0.926490984743412,
|
| 14 |
"general_accuracy": 0.9209809264305178,
|
|
|
|
| 9 |
"recall": 0.031958762886597936,
|
| 10 |
"f1": 0.014476961394769616
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.07628865979381444,
|
| 20 |
"domain_accuracy": 0.926490984743412,
|
| 21 |
"general_accuracy": 0.9209809264305178,
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.07352941176470588,
|
| 10 |
"f1": 0.03802281368821293
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.03104575163398693,
|
| 13 |
"domain_accuracy": 0.9721115537848606,
|
| 14 |
"general_accuracy": 0.9667590027700831,
|
|
|
|
| 9 |
"recall": 0.07352941176470588,
|
| 10 |
"f1": 0.03802281368821293
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.03104575163398693,
|
| 20 |
"domain_accuracy": 0.9721115537848606,
|
| 21 |
"general_accuracy": 0.9667590027700831,
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/50/metrics_test_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.07642276422764227,
|
| 10 |
"f1": 0.03924843423799583
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.013008130081300813,
|
| 13 |
"domain_accuracy": 0.9918367346938776,
|
| 14 |
"general_accuracy": 0.9837837837837838,
|
|
|
|
| 9 |
"recall": 0.07642276422764227,
|
| 10 |
"f1": 0.03924843423799583
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.013008130081300813,
|
| 20 |
"domain_accuracy": 0.9918367346938776,
|
| 21 |
"general_accuracy": 0.9837837837837838,
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/50/results_test_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/WithLabels/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/WithTerms/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.03649237472766884,
|
| 10 |
"f1": 0.05288082083662194
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.7595704948646125,
|
| 13 |
"domain_accuracy": 0.12893553223388307,
|
| 14 |
"general_accuracy": 0.3040449712819259,
|
|
|
|
| 9 |
"recall": 0.03649237472766884,
|
| 10 |
"f1": 0.05288082083662194
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0014326647564469914,
|
| 14 |
+
"recall": 0.0005446623093681918,
|
| 15 |
+
"f1": 0.0007892659826361484,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0007892659826361484,
|
| 19 |
"spriv": 0.7595704948646125,
|
| 20 |
"domain_accuracy": 0.12893553223388307,
|
| 21 |
"general_accuracy": 0.3040449712819259,
|
Liquid350M/promptExperiments/EnglishHard/WithTerms/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/WithTerms/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.02302405498281787,
|
| 10 |
"f1": 0.02667728449133984
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.7109965635738832,
|
| 13 |
"domain_accuracy": 0.2447988904299584,
|
| 14 |
"general_accuracy": 0.33242506811989103,
|
|
|
|
| 9 |
"recall": 0.02302405498281787,
|
| 10 |
"f1": 0.02667728449133984
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.7109965635738832,
|
| 20 |
"domain_accuracy": 0.2447988904299584,
|
| 21 |
"general_accuracy": 0.33242506811989103,
|
Liquid350M/promptExperiments/EnglishHard/WithTerms/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/EnglishHard/WithTerms/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.08333333333333333,
|
| 10 |
"f1": 0.04283914321713566
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.049019607843137254,
|
| 13 |
"domain_accuracy": 0.952191235059761,
|
| 14 |
"general_accuracy": 0.9501385041551247,
|
|
|
|
| 9 |
"recall": 0.08333333333333333,
|
| 10 |
"f1": 0.04283914321713566
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.049019607843137254,
|
| 20 |
"domain_accuracy": 0.952191235059761,
|
| 21 |
"general_accuracy": 0.9501385041551247,
|
Liquid350M/promptExperiments/EnglishHard/WithTerms/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/Mix/Basic/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.07058545797922569,
|
| 10 |
"f1": 0.04902309058614565
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.3523764557758892,
|
| 13 |
"domain_accuracy": 0.6231371946535567,
|
| 14 |
"general_accuracy": 0.673334408775609,
|
|
|
|
| 9 |
"recall": 0.07058545797922569,
|
| 10 |
"f1": 0.04902309058614565
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.3523764557758892,
|
| 20 |
"domain_accuracy": 0.6231371946535567,
|
| 21 |
"general_accuracy": 0.673334408775609,
|
Liquid350M/promptExperiments/Mix/Basic/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/Mix/Basic/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.0332522303325223,
|
| 10 |
"f1": 0.02295204329165889
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.38686131386861317,
|
| 13 |
"domain_accuracy": 0.6234225138818779,
|
| 14 |
"general_accuracy": 0.6012805587892899,
|
|
|
|
| 9 |
"recall": 0.0332522303325223,
|
| 10 |
"f1": 0.02295204329165889
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.38686131386861317,
|
| 20 |
"domain_accuracy": 0.6234225138818779,
|
| 21 |
"general_accuracy": 0.6012805587892899,
|
Liquid350M/promptExperiments/Mix/Basic/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/Mix/Basic/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.06891025641025642,
|
| 10 |
"f1": 0.037423846823324634
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.16185897435897437,
|
| 13 |
"domain_accuracy": 0.8200589970501475,
|
| 14 |
"general_accuracy": 0.8596491228070176,
|
|
|
|
| 9 |
"recall": 0.06891025641025642,
|
| 10 |
"f1": 0.037423846823324634
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.16185897435897437,
|
| 20 |
"domain_accuracy": 0.8200589970501475,
|
| 21 |
"general_accuracy": 0.8596491228070176,
|
Liquid350M/promptExperiments/Mix/Basic/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/Mix/Mixed/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.00039345294302801383,
|
| 10 |
"f1": 0.0007320108337603396
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.9390934844192634,
|
| 13 |
"domain_accuracy": 0.04685819634352435,
|
| 14 |
"general_accuracy": 0.07565736409098242,
|
|
|
|
| 9 |
"recall": 0.00039345294302801383,
|
| 10 |
"f1": 0.0007320108337603396
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.9390934844192634,
|
| 20 |
"domain_accuracy": 0.04685819634352435,
|
| 21 |
"general_accuracy": 0.07565736409098242,
|
Liquid350M/promptExperiments/Mix/Mixed/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/Mix/Mixed/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.0032441200324412004,
|
| 10 |
"f1": 0.005934718100890209
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.943227899432279,
|
| 13 |
"domain_accuracy": 0.057041898031297326,
|
| 14 |
"general_accuracy": 0.056461001164144355,
|
|
|
|
| 9 |
"recall": 0.0032441200324412004,
|
| 10 |
"f1": 0.005934718100890209
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.943227899432279,
|
| 20 |
"domain_accuracy": 0.057041898031297326,
|
| 21 |
"general_accuracy": 0.056461001164144355,
|
Liquid350M/promptExperiments/Mix/Mixed/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/Mix/Mixed/50/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.01282051282051282,
|
| 10 |
"f1": 0.013547840812870448
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.5641025641025641,
|
| 13 |
"domain_accuracy": 0.471976401179941,
|
| 14 |
"general_accuracy": 0.3929824561403509,
|
|
|
|
| 9 |
"recall": 0.01282051282051282,
|
| 10 |
"f1": 0.013547840812870448
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0,
|
| 14 |
+
"recall": 0.0,
|
| 15 |
+
"f1": 0.0,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0,
|
| 19 |
"spriv": 0.5641025641025641,
|
| 20 |
"domain_accuracy": 0.471976401179941,
|
| 21 |
"general_accuracy": 0.3929824561403509,
|
Liquid350M/promptExperiments/Mix/Mixed/50/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/1000/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.019751337740006297,
|
| 10 |
"f1": 0.012231670768256134
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.3051621026125275,
|
| 13 |
"domain_accuracy": 0.6500230450145952,
|
| 14 |
"general_accuracy": 0.7418938538473947,
|
|
|
|
| 9 |
"recall": 0.019751337740006297,
|
| 10 |
"f1": 0.012231670768256134
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.0007058906575371475,
|
| 14 |
+
"recall": 0.0015738117721120553,
|
| 15 |
+
"f1": 0.0009746351209765845,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.0009746351209765845,
|
| 19 |
"spriv": 0.3051621026125275,
|
| 20 |
"domain_accuracy": 0.6500230450145952,
|
| 21 |
"general_accuracy": 0.7418938538473947,
|
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/1000/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/hfMetrics/metrics_test_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.032117583015786606,
|
| 10 |
"f1": 0.02987719964552475
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.5982580293957539,
|
| 13 |
"domain_accuracy": 0.3745492014425554,
|
| 14 |
"general_accuracy": 0.432198499711483,
|
|
|
|
| 9 |
"recall": 0.032117583015786606,
|
| 10 |
"f1": 0.02987719964552475
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.001893491124260355,
|
| 14 |
+
"recall": 0.002177463255307567,
|
| 15 |
+
"f1": 0.002025572857323712,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.002025572857323712,
|
| 19 |
"spriv": 0.5982580293957539,
|
| 20 |
"domain_accuracy": 0.3745492014425554,
|
| 21 |
"general_accuracy": 0.432198499711483,
|
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/hfMetrics/results_test_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/metrics_new.json
CHANGED
|
@@ -9,6 +9,13 @@
|
|
| 9 |
"recall": 0.01567991349013247,
|
| 10 |
"f1": 0.01033223479112853
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"spriv": 0.3560421735604217,
|
| 13 |
"domain_accuracy": 0.6431095406360424,
|
| 14 |
"general_accuracy": 0.6449359720605355,
|
|
|
|
| 9 |
"recall": 0.01567991349013247,
|
| 10 |
"f1": 0.01033223479112853
|
| 11 |
},
|
| 12 |
+
"span_level": {
|
| 13 |
+
"precision": 0.00026567481402763017,
|
| 14 |
+
"recall": 0.0005406866720735334,
|
| 15 |
+
"f1": 0.00035628395831477686,
|
| 16 |
+
"iou_threshold": 0.5
|
| 17 |
+
},
|
| 18 |
+
"f1_s": 0.00035628395831477686,
|
| 19 |
"spriv": 0.3560421735604217,
|
| 20 |
"domain_accuracy": 0.6431095406360424,
|
| 21 |
"general_accuracy": 0.6449359720605355,
|
Liquid350M/promptExperiments/Mix/RAG_3word_Top1/350/results_with_metrics_new.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|