diff --git a/csv_files/llm_scores_p3.xlsx b/csv_files/llm_scores_p3.xlsx index bfc0bdc185caa4e7f0530b277867503d7df485fc..3621768a991e0d757b6e5af462af6dc618f9f293 100644 Binary files a/csv_files/llm_scores_p3.xlsx and b/csv_files/llm_scores_p3.xlsx differ diff --git a/csv_files/outputs/.ipynb_checkpoints/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot-checkpoint.txt b/csv_files/outputs/.ipynb_checkpoints/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot-checkpoint.txt deleted file mode 100644 index 37a0a0b3fcec4413d915fb8b17302fe6c93286c4..0000000000000000000000000000000000000000 --- a/csv_files/outputs/.ipynb_checkpoints/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot-checkpoint.txt +++ /dev/null @@ -1,11 +0,0 @@ -hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1 -|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| -|-------|-------|------|------|------|----|------|---|------| -| - NER | | | |f1 | | 0.2877 | |0 | -| - p1 | | | |f1 | | 0.1963 | | 0 | -| - p2 | | | |f1 | | 0.3459 | | 0 | -| - p3 | | | |f1 | | 0.3208 | | 0 | -| - RE | | | |f1 | | 0.4430 | |0 | -| - p1 | | | |f1 | | 0.4487 | | 0 | -| - p2 | | | |f1 | | 0.4492 | | 0 | -| - p3 | | | |f1 | | 0.4311 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt index 61582a2a92516a6c5547648b28d3f1e909d0067d..970790d8c37ee624a941d7838baf752c1418f32c 100644 --- a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.2189 | | 0 | | - p2 | | | |f1 | | 0.2243 | | 0 | | - p3 | | | |f1 | | 0.1994 | | 0 | -| - RE | | | |f1 | | 0.1429 | |0 | +| - RE | | | |f1 | | 0.1681 | |0 | | - p1 | | | |f1 | | 0.1189 | | 0 | | - p2 | | | |f1 | | 0.1668 | | 0 | +| - p3 | | | |f1 | | 0.2185 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt index 29ae4e0eed887a125d86ca5fca2d2fd3146689cf..e1968c70df7ae59de71b96c9719693f1041cc591 100644 --- a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.1667 | | 0 | | - p2 | | | |f1 | | 0.1089 | | 0 | | - p3 | | | |f1 | | 0.1667 | | 0 | -| - RE | | | |f1 | | 0.0937 | |0 | +| - RE | | | |f1 | | 0.0970 | |0 | | - p1 | | | |f1 | | 0.0821 | | 0 | | - p2 | | | |f1 | | 0.1053 | | 0 | +| - p3 | | | |f1 | | 0.1036 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt index ae779703662ed0449b631e8a70a4b116bcc8b8d1..1324843bcf3efaa493662c82d523957a6e202c45 100644 --- a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3299 | | 0 | | - p2 | | | |f1 | | 0.4023 | | 0 | | - p3 | | | |f1 | | 0.3938 | | 0 | -| - RE | | | |f1 | | 0.1102 | |0 | +| - RE | | | |f1 | | 0.1331 | |0 | | - p1 | | | |f1 | | 0.0977 | | 0 | | - p2 | | | |f1 | | 0.1226 | | 0 | +| - p3 | | | |f1 | | 0.1789 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt index acd4a7476db8ef3ad2bf489c194693722148311b..9dee6185f81350fdc85c72cb4a61be93b071cc61 100644 --- a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3992 | | 0 | | - p2 | | | |f1 | | 0.3916 | | 0 | | - p3 | | | |f1 | | 0.3992 | | 0 | -| - RE | | | |f1 | | 0.1026 | |0 | +| - RE | | | |f1 | | 0.1003 | |0 | | - p1 | | | |f1 | | 0.0998 | | 0 | | - p2 | | | |f1 | | 0.1055 | | 0 | +| - p3 | | | |f1 | | 0.0956 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt index 9b45fb0567534355d82b8b2d43856a0bfb581c56..8c591c7f0a88ced816e237245a16bdc6d688db83 100644 --- a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3444 | | 0 | | - p2 | | | |f1 | | 0.3632 | | 0 | | - p3 | | | |f1 | | 0.3444 | | 0 | -| - RE | | | |f1 | | 0.0889 | |0 | +| - RE | | | |f1 | | 0.0884 | |0 | | - p1 | | | |f1 | | 0.0734 | | 0 | | - p2 | | | |f1 | | 0.1045 | | 0 | +| - p3 | | | |f1 | | 0.0875 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt index 7b07a94f1ebd78d5e08c541a05bfdc7ed1019b8f..f5a52d295a6f2f02b23f1a057560f2abba92d1b8 100644 --- a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3558 | | 0 | | - p2 | | | |f1 | | 0.4045 | | 0 | | - p3 | | | |f1 | | 0.3558 | | 0 | -| - RE | | | |f1 | | 0.0784 | |0 | +| - RE | | | |f1 | | 0.0762 | |0 | | - p1 | | | |f1 | | 0.0787 | | 0 | | - p2 | | | |f1 | | 0.0781 | | 0 | +| - p3 | | | |f1 | | 0.0719 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt index 0643402ba5ae01bc4e4c6bd7daf9feef2b3db365..765d090e468a522437f45edb65fc5d65485264b7 100644 --- a/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.1215 | | 0 | | - p2 | | | |f1 | | 0.1415 | | 0 | | - p3 | | | |f1 | | 0.1322 | | 0 | -| - RE | | | |f1 | | 0.0022 | |0 | +| - RE | | | |f1 | | 0.0031 | |0 | | - p1 | | | |f1 | | 0.0028 | | 0 | | - p2 | | | |f1 | | 0.0016 | | 0 | +| - p3 | | | |f1 | | 0.0049 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt index ff8fefd9156034f31a413c0ab9054739db228357..4edd50dc0d05a279ed9a6be3efb12660fc646344 100644 --- a/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.1455 | | 0 | | - p2 | | | |f1 | | 0.1434 | | 0 | | - p3 | | | |f1 | | 0.1455 | | 0 | -| - RE | | | |f1 | | 0.0015 | |0 | +| - RE | | | |f1 | | 0.0010 | |0 | | - p1 | | | |f1 | | 0.0024 | | 0 | | - p2 | | | |f1 | | 0.0007 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt index b5cbc685ecfb0eb297165a388593a27f47e6ca5a..2b5e4935064fdb46893c4e273f664a6bc5a4bf79 100644 --- a/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.1616 | | 0 | | - p2 | | | |f1 | | 0.1774 | | 0 | | - p3 | | | |f1 | | 0.1690 | | 0 | -| - RE | | | |f1 | | 0.0050 | |0 | +| - RE | | | |f1 | | 0.0048 | |0 | | - p1 | | | |f1 | | 0.0035 | | 0 | | - p2 | | | |f1 | | 0.0064 | | 0 | +| - p3 | | | |f1 | | 0.0046 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt index 75efbbc4704662e68e763f29da40673f2cc3de53..9c96e416317f7b151616c4982e8c0640322bb615 100644 --- a/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.1500 | | 0 | | - p2 | | | |f1 | | 0.1548 | | 0 | | - p3 | | | |f1 | | 0.1500 | | 0 | -| - RE | | | |f1 | | 0.0031 | |0 | +| - RE | | | |f1 | | 0.0032 | |0 | | - p1 | | | |f1 | | 0.0040 | | 0 | | - p2 | | | |f1 | | 0.0023 | | 0 | +| - p3 | | | |f1 | | 0.0034 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt index 13f060a2b8a3074c4ca8104730c886f8ea6febbd..ce7ca5e76b585007a9dc187a6dd14ae6e22f17cc 100644 --- a/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.1485 | | 0 | | - p2 | | | |f1 | | 0.1360 | | 0 | | - p3 | | | |f1 | | 0.1485 | | 0 | -| - RE | | | |f1 | | 0.0031 | |0 | +| - RE | | | |f1 | | 0.0027 | |0 | | - p1 | | | |f1 | | 0.0038 | | 0 | | - p2 | | | |f1 | | 0.0024 | | 0 | +| - p3 | | | |f1 | | 0.0020 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt index 48b63bcc55dff5d33703118e50cac12638711b11..97237b461fcde9621e1b414675820a8989f1add9 100644 --- a/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.1470 | | 0 | | - p2 | | | |f1 | | 0.1325 | | 0 | | - p3 | | | |f1 | | 0.1470 | | 0 | -| - RE | | | |f1 | | 0.0073 | |0 | +| - RE | | | |f1 | | 0.0080 | |0 | | - p1 | | | |f1 | | 0.0073 | | 0 | | - p2 | | | |f1 | | 0.0074 | | 0 | +| - p3 | | | |f1 | | 0.0093 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt index 2204043f611ed143de085c6daebb131f2cf429a0..a27ebddd1aefdbbdb6edd25f8352b15456cf81a7 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6091 | | 0 | | - p2 | | | |f1 | | 0.5646 | | 0 | | - p3 | | | |f1 | | 0.6243 | | 0 | -| - RE | | | |f1 | | 0.6179 | |0 | +| - RE | | | |f1 | | 0.6164 | |0 | | - p1 | | | |f1 | | 0.6332 | | 0 | | - p2 | | | |f1 | | 0.6025 | | 0 | +| - p3 | | | |f1 | | 0.6133 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt index 42934e85489839621e98169698fe948e952c8c6c..8435fa43de5b6d649de6e305295728062df17d85 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6119 | | 0 | | - p2 | | | |f1 | | 0.5847 | | 0 | | - p3 | | | |f1 | | 0.6119 | | 0 | -| - RE | | | |f1 | | 0.5993 | |0 | +| - RE | | | |f1 | | 0.6056 | |0 | | - p1 | | | |f1 | | 0.5962 | | 0 | | - p2 | | | |f1 | | 0.6024 | | 0 | +| - p3 | | | |f1 | | 0.6183 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt index 8d663f2e51718e681b0e5a34673c3eca6472d5fa..3e78b0cd1d68e177e93c6ae63d60ff33e934b1cd 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6719 | | 0 | | - p2 | | | |f1 | | 0.6327 | | 0 | | - p3 | | | |f1 | | 0.6661 | | 0 | -| - RE | | | |f1 | | 0.5882 | |0 | +| - RE | | | |f1 | | 0.5952 | |0 | | - p1 | | | |f1 | | 0.5767 | | 0 | | - p2 | | | |f1 | | 0.5998 | | 0 | +| - p3 | | | |f1 | | 0.6093 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt index d66d9aea480496a8e2a891abce6603d76f5260a4..465d16af61fd9338c7188c53fbf60f164ed3aac6 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6226 | | 0 | | - p2 | | | |f1 | | 0.5824 | | 0 | | - p3 | | | |f1 | | 0.6226 | | 0 | -| - RE | | | |f1 | | 0.5729 | |0 | +| - RE | | | |f1 | | 0.5944 | |0 | | - p1 | | | |f1 | | 0.5991 | | 0 | | - p2 | | | |f1 | | 0.5466 | | 0 | +| - p3 | | | |f1 | | 0.6375 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt index d53ea8895c02919662864752ad71582c884d2238..63b5158840c219e67fbf758e2ed730ca530afe7d 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6386 | | 0 | | - p2 | | | |f1 | | 0.6486 | | 0 | | - p3 | | | |f1 | | 0.6386 | | 0 | -| - RE | | | |f1 | | 0.5869 | |0 | +| - RE | | | |f1 | | 0.5899 | |0 | | - p1 | | | |f1 | | 0.5894 | | 0 | | - p2 | | | |f1 | | 0.5845 | | 0 | +| - p3 | | | |f1 | | 0.5959 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt index 958c2adc849a31b48cc656569846de4ca06227da..11a5d3eb944b1de7399b5736ad5127c36767eac5 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6467 | | 0 | | - p2 | | | |f1 | | 0.6178 | | 0 | | - p3 | | | |f1 | | 0.6467 | | 0 | -| - RE | | | |f1 | | 0.5865 | |0 | +| - RE | | | |f1 | | 0.5837 | |0 | | - p1 | | | |f1 | | 0.5949 | | 0 | | - p2 | | | |f1 | | 0.5782 | | 0 | +| - p3 | | | |f1 | | 0.5781 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt index b001d05a9e26c936952ee1013dcf2a2443277641..5071ef1d6625c2e56e68013ab891e5757b1af187 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5970 | | 0 | | - p2 | | | |f1 | | 0.5602 | | 0 | | - p3 | | | |f1 | | 0.6113 | | 0 | -| - RE | | | |f1 | | 0.6475 | |0 | +| - RE | | | |f1 | | 0.6440 | |0 | | - p1 | | | |f1 | | 0.6482 | | 0 | | - p2 | | | |f1 | | 0.6469 | | 0 | +| - p3 | | | |f1 | | 0.6370 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt index 9087ff1ca50e0b67c158dad6587614e551149e8a..fa6241f9b435b69937d53ca833cc5a27fa25c2c0 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6196 | | 0 | | - p2 | | | |f1 | | 0.6131 | | 0 | | - p3 | | | |f1 | | 0.6196 | | 0 | -| - RE | | | |f1 | | 0.5905 | |0 | +| - RE | | | |f1 | | 0.5840 | |0 | | - p1 | | | |f1 | | 0.5913 | | 0 | | - p2 | | | |f1 | | 0.5896 | | 0 | +| - p3 | | | |f1 | | 0.5710 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt index cc9811aa482472e85ccae9731c4d4718a8f562f1..9fd68069be248fb9602424d0ab5e675f83263e82 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6934 | | 0 | | - p2 | | | |f1 | | 0.7152 | | 0 | | - p3 | | | |f1 | | 0.6930 | | 0 | -| - RE | | | |f1 | | 0.5698 | |0 | +| - RE | | | |f1 | | 0.5641 | |0 | | - p1 | | | |f1 | | 0.5801 | | 0 | | - p2 | | | |f1 | | 0.5595 | | 0 | +| - p3 | | | |f1 | | 0.5526 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt index daa0b650060090e245eac4326b244e808fc2630b..c0657f5bc039e0ef6c46d0a9ab79ea5c33277f47 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6008 | | 0 | | - p2 | | | |f1 | | 0.6004 | | 0 | | - p3 | | | |f1 | | 0.6008 | | 0 | -| - RE | | | |f1 | | 0.5863 | |0 | +| - RE | | | |f1 | | 0.5888 | |0 | | - p1 | | | |f1 | | 0.5858 | | 0 | | - p2 | | | |f1 | | 0.5868 | | 0 | +| - p3 | | | |f1 | | 0.5938 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt index 1ccda420b821daf609bb5cd16171ae53df52940e..ccd3f8f6a3d5adfc50bb93253d2b1a2baddb48ea 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6743 | | 0 | | - p2 | | | |f1 | | 0.6673 | | 0 | | - p3 | | | |f1 | | 0.6743 | | 0 | -| - RE | | | |f1 | | 0.5659 | |0 | +| - RE | | | |f1 | | 0.5643 | |0 | | - p1 | | | |f1 | | 0.5733 | | 0 | | - p2 | | | |f1 | | 0.5586 | | 0 | +| - p3 | | | |f1 | | 0.5609 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt index 278cd40a91ad4620e4b27495dedde513b441a8b9..45927874109b49e1ce1db253c58c78ab3ea1a926 100644 --- a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6253 | | 0 | | - p2 | | | |f1 | | 0.6615 | | 0 | | - p3 | | | |f1 | | 0.6253 | | 0 | -| - RE | | | |f1 | | 0.5921 | |0 | +| - RE | | | |f1 | | 0.5727 | |0 | | - p1 | | | |f1 | | 0.5992 | | 0 | | - p2 | | | |f1 | | 0.5849 | | 0 | +| - p3 | | | |f1 | | 0.5339 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt index fc37c2a39be557639c52f60a8ec6ae1da9e32322..09d6c20e438665ebd65645a507c560c6cb20a278 100644 --- a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: | - p1 | | | |f1 | | 0.5986 | | 0 | | - p2 | | | |f1 | | 0.5593 | | 0 | | - p3 | | | |f1 | | 0.6143 | | 0 | -| - RE | | | |f1 | | 0.5206 | |0 | +| - RE | | | |f1 | | 0.5259 | |0 | | - p1 | | | |f1 | | 0.5150 | | 0 | | - p2 | | | |f1 | | 0.5261 | | 0 | +| - p3 | | | |f1 | | 0.5364 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt index 5d8438e1126afa00ac9f53c06974a76c9723dd0e..e7b5e451b837ea640578ca02095bc52a621c7ee1 100644 --- a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: | - p1 | | | |f1 | | 0.6164 | | 0 | | - p2 | | | |f1 | | 0.5669 | | 0 | | - p3 | | | |f1 | | 0.6164 | | 0 | -| - RE | | | |f1 | | 0.5112 | |0 | +| - RE | | | |f1 | | 0.5149 | |0 | | - p1 | | | |f1 | | 0.5015 | | 0 | | - p2 | | | |f1 | | 0.5209 | | 0 | +| - p3 | | | |f1 | | 0.5223 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt index bcd6e22ceb291e8b04051163bb61adbe6a7b4a6d..9dbb95dc7c82ed188142cf9ef0a295bfd27bb3e5 100644 --- a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: | - p1 | | | |f1 | | 0.6793 | | 0 | | - p2 | | | |f1 | | 0.6447 | | 0 | | - p3 | | | |f1 | | 0.6778 | | 0 | -| - RE | | | |f1 | | 0.5940 | |0 | +| - RE | | | |f1 | | 0.5982 | |0 | | - p1 | | | |f1 | | 0.6041 | | 0 | | - p2 | | | |f1 | | 0.5838 | | 0 | +| - p3 | | | |f1 | | 0.6065 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt index cb0f5ba822a9b89ddfa3fd007d30529c8bc1a93d..9f116f8c7deee3f443c689514bb8a23fdb8d305c 100644 --- a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: | - p1 | | | |f1 | | 0.6276 | | 0 | | - p2 | | | |f1 | | 0.5803 | | 0 | | - p3 | | | |f1 | | 0.6276 | | 0 | -| - RE | | | |f1 | | 0.5151 | |0 | +| - RE | | | |f1 | | 0.5166 | |0 | | - p1 | | | |f1 | | 0.5103 | | 0 | | - p2 | | | |f1 | | 0.5200 | | 0 | +| - p3 | | | |f1 | | 0.5195 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt index 1f65d2dec6b3e36d1d623e101a901e98be8a42ee..652672223f87eeb324263928437a787e75b87b20 100644 --- a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: | - p1 | | | |f1 | | 0.6085 | | 0 | | - p2 | | | |f1 | | 0.5919 | | 0 | | - p3 | | | |f1 | | 0.6085 | | 0 | -| - RE | | | |f1 | | 0.4972 | |0 | +| - RE | | | |f1 | | 0.5106 | |0 | | - p1 | | | |f1 | | 0.4920 | | 0 | | - p2 | | | |f1 | | 0.5025 | | 0 | +| - p3 | | | |f1 | | 0.5373 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt index ab62dfffd44ae57f61f2f3a699bdbaf5e4c965da..827b5e0d0dd790eea628cc4c77b18800829dd3d5 100644 --- a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: | - p1 | | | |f1 | | 0.6615 | | 0 | | - p2 | | | |f1 | | 0.5944 | | 0 | | - p3 | | | |f1 | | 0.6615 | | 0 | -| - RE | | | |f1 | | 0.5319 | |0 | +| - RE | | | |f1 | | 0.5356 | |0 | | - p1 | | | |f1 | | 0.5062 | | 0 | | - p2 | | | |f1 | | 0.5576 | | 0 | +| - p3 | | | |f1 | | 0.5429 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt index 023290e171283c80a01550d0c36944a84da64c7c..d1353488cb49ca2c17d3d3e38c7a9b5efe1528a4 100644 --- a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batc | - p1 | | | |f1 | | 0.6024 | | 0 | | - p2 | | | |f1 | | 0.5929 | | 0 | | - p3 | | | |f1 | | 0.5935 | | 0 | -| - RE | | | |f1 | | 0.5195 | |0 | +| - RE | | | |f1 | | 0.5221 | |0 | | - p1 | | | |f1 | | 0.5191 | | 0 | | - p2 | | | |f1 | | 0.5199 | | 0 | +| - p3 | | | |f1 | | 0.5273 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt index c0a0adcf0ce098d34161ff1141bd34bb2575d3f5..d10be9c5334b54f3adcb1cee0c3d5a9defc21084 100644 --- a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batc | - p1 | | | |f1 | | 0.5928 | | 0 | | - p2 | | | |f1 | | 0.5796 | | 0 | | - p3 | | | |f1 | | 0.5928 | | 0 | -| - RE | | | |f1 | | 0.4338 | |0 | +| - RE | | | |f1 | | 0.4415 | |0 | | - p1 | | | |f1 | | 0.4467 | | 0 | | - p2 | | | |f1 | | 0.4210 | | 0 | +| - p3 | | | |f1 | | 0.4569 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt index 96e35e56e52b93e51041be086d3cf5cba46d9906..df97d37a91573d214913b317e4b83ba9899e389a 100644 --- a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batc | - p1 | | | |f1 | | 0.6982 | | 0 | | - p2 | | | |f1 | | 0.6679 | | 0 | | - p3 | | | |f1 | | 0.6930 | | 0 | -| - RE | | | |f1 | | 0.5536 | |0 | +| - RE | | | |f1 | | 0.5530 | |0 | | - p1 | | | |f1 | | 0.5546 | | 0 | | - p2 | | | |f1 | | 0.5526 | | 0 | +| - p3 | | | |f1 | | 0.5518 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt index cade308716b27c7c7d58a10a3bcce1456d819301..7bbdde1853e3107c0e3fa26a80be768aedf20a06 100644 --- a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batc | - p1 | | | |f1 | | 0.6214 | | 0 | | - p2 | | | |f1 | | 0.6140 | | 0 | | - p3 | | | |f1 | | 0.6214 | | 0 | -| - RE | | | |f1 | | 0.4996 | |0 | +| - RE | | | |f1 | | 0.5023 | |0 | | - p1 | | | |f1 | | 0.4863 | | 0 | | - p2 | | | |f1 | | 0.5129 | | 0 | +| - p3 | | | |f1 | | 0.5076 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt index ae309c6d90bb3718e41b93015b2729d95d7432a0..6516944fdac3db3ed3380f5c97391fae7dbc061d 100644 --- a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batc | - p1 | | | |f1 | | 0.6347 | | 0 | | - p2 | | | |f1 | | 0.6211 | | 0 | | - p3 | | | |f1 | | 0.6347 | | 0 | -| - RE | | | |f1 | | 0.4625 | |0 | +| - RE | | | |f1 | | 0.4646 | |0 | | - p1 | | | |f1 | | 0.4799 | | 0 | | - p2 | | | |f1 | | 0.4451 | | 0 | +| - p3 | | | |f1 | | 0.4689 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt index 90e4a5b6c1e518cabb5f827b9b9773f921f5a271..698e2379856e9df40de4014fdbd473b61395c81b 100644 --- a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batc | - p1 | | | |f1 | | 0.6015 | | 0 | | - p2 | | | |f1 | | 0.6049 | | 0 | | - p3 | | | |f1 | | 0.6015 | | 0 | -| - RE | | | |f1 | | 0.4905 | |0 | +| - RE | | | |f1 | | 0.4911 | |0 | | - p1 | | | |f1 | | 0.5137 | | 0 | | - p2 | | | |f1 | | 0.4674 | | 0 | +| - p3 | | | |f1 | | 0.4923 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt index c78145b726a3287ad71bdbf95898e5bcf690cec5..45de7e54ed94cfb5fcc9ab2c265f192e7aa9f981 100644 --- a/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt +++ b/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.0803 | | 0 | | - p2 | | | |f1 | | 0.1479 | | 0 | | - p3 | | | |f1 | | 0.1454 | | 0 | -| - RE | | | |f1 | | 0.0707 | |0 | +| - RE | | | |f1 | | 0.0692 | |0 | | - p1 | | | |f1 | | 0.0722 | | 0 | | - p2 | | | |f1 | | 0.0692 | | 0 | +| - p3 | | | |f1 | | 0.0663 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt index f84248bfeea07c329a9316ef806317b13694de28..87b319e4253b8aba65bfcf2e4ade2615fc2ae10e 100644 --- a/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt +++ b/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt @@ -8,3 +8,4 @@ hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 | - RE | | | |f1 | | 0.0000 | |0 | | - p1 | | | |f1 | | 0.0000 | | 0 | | - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt index e4f711b8934a50927b9c80e4173b530029a20a83..b2aa7fd7dc8637dbd14ef01f078eceecddd04f15 100644 --- a/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt +++ b/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.2991 | | 0 | | - p2 | | | |f1 | | 0.3563 | | 0 | | - p3 | | | |f1 | | 0.3311 | | 0 | -| - RE | | | |f1 | | 0.0859 | |0 | +| - RE | | | |f1 | | 0.0896 | |0 | | - p1 | | | |f1 | | 0.0832 | | 0 | | - p2 | | | |f1 | | 0.0887 | | 0 | +| - p3 | | | |f1 | | 0.0968 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt index 8a5ce7fd170f07cb5e4701da5f07a94823b77556..14675a45035d0e69895142e3b0f6800ec9197583 100644 --- a/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt +++ b/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3184 | | 0 | | - p2 | | | |f1 | | 0.3297 | | 0 | | - p3 | | | |f1 | | 0.3184 | | 0 | -| - RE | | | |f1 | | 0.0497 | |0 | +| - RE | | | |f1 | | 0.0510 | |0 | | - p1 | | | |f1 | | 0.0533 | | 0 | | - p2 | | | |f1 | | 0.0461 | | 0 | +| - p3 | | | |f1 | | 0.0535 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt index ab4b2789448393a410110ed0c9552f2282a33d44..bad9a6c35cda030096e0a1ffe1e020b004d5263a 100644 --- a/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt +++ b/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3004 | | 0 | | - p2 | | | |f1 | | 0.2970 | | 0 | | - p3 | | | |f1 | | 0.3004 | | 0 | -| - RE | | | |f1 | | 0.0419 | |0 | +| - RE | | | |f1 | | 0.0404 | |0 | | - p1 | | | |f1 | | 0.0445 | | 0 | | - p2 | | | |f1 | | 0.0393 | | 0 | +| - p3 | | | |f1 | | 0.0375 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt index d715ee1da72e5acea32d9826df9b4a6f158e0133..eab52b0c7040bdf63e365ec759ff69b327922c10 100644 --- a/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt +++ b/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3119 | | 0 | | - p2 | | | |f1 | | 0.2916 | | 0 | | - p3 | | | |f1 | | 0.3119 | | 0 | -| - RE | | | |f1 | | 0.0489 | |0 | +| - RE | | | |f1 | | 0.0502 | |0 | | - p1 | | | |f1 | | 0.0477 | | 0 | | - p2 | | | |f1 | | 0.0501 | | 0 | +| - p3 | | | |f1 | | 0.0528 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt index 351987c72f1460be7fa2757c9bab541d58d86046..fd9ea572467adfa2c454f1539b60635facd6d39f 100644 --- a/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt +++ b/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6200 | | 0 | | - p2 | | | |f1 | | 0.5639 | | 0 | | - p3 | | | |f1 | | 0.5918 | | 0 | -| - RE | | | |f1 | | 0.5250 | |0 | +| - RE | | | |f1 | | 0.5303 | |0 | | - p1 | | | |f1 | | 0.5163 | | 0 | | - p2 | | | |f1 | | 0.5337 | | 0 | +| - p3 | | | |f1 | | 0.5409 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt index ab0b09b144a5d00d514bc49444e9f2f2ede3b715..739bad8c7a5639671141f53c0413696e38d96592 100644 --- a/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt +++ b/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6083 | | 0 | | - p2 | | | |f1 | | 0.5663 | | 0 | | - p3 | | | |f1 | | 0.6083 | | 0 | -| - RE | | | |f1 | | 0.5020 | |0 | +| - RE | | | |f1 | | 0.5162 | |0 | | - p1 | | | |f1 | | 0.5070 | | 0 | | - p2 | | | |f1 | | 0.4971 | | 0 | +| - p3 | | | |f1 | | 0.5444 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt index 8f5b46311c3fb129a2ef013321002e72081ad6dc..51580ad2fec3d1b525363e7391cd794ab01ea59f 100644 --- a/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt +++ b/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6910 | | 0 | | - p2 | | | |f1 | | 0.6643 | | 0 | | - p3 | | | |f1 | | 0.6569 | | 0 | -| - RE | | | |f1 | | 0.5162 | |0 | +| - RE | | | |f1 | | 0.5209 | |0 | | - p1 | | | |f1 | | 0.4958 | | 0 | | - p2 | | | |f1 | | 0.5365 | | 0 | +| - p3 | | | |f1 | | 0.5305 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt index 76dc18d9929e7897f0ced0ae101718ebb60d6228..5ac1adba3779f7f68d4bfbf2cf88b163e3b84f4b 100644 --- a/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt +++ b/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5908 | | 0 | | - p2 | | | |f1 | | 0.5862 | | 0 | | - p3 | | | |f1 | | 0.5908 | | 0 | -| - RE | | | |f1 | | 0.4988 | |0 | +| - RE | | | |f1 | | 0.5033 | |0 | | - p1 | | | |f1 | | 0.5168 | | 0 | | - p2 | | | |f1 | | 0.4808 | | 0 | +| - p3 | | | |f1 | | 0.5124 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt index b0e88ea7e76efcab0b135cd8a95fc77858475755..b909c3988f118020a3d985678e092a54be2f61f1 100644 --- a/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt +++ b/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6141 | | 0 | | - p2 | | | |f1 | | 0.6122 | | 0 | | - p3 | | | |f1 | | 0.6141 | | 0 | -| - RE | | | |f1 | | 0.4953 | |0 | +| - RE | | | |f1 | | 0.5007 | |0 | | - p1 | | | |f1 | | 0.5153 | | 0 | | - p2 | | | |f1 | | 0.4754 | | 0 | +| - p3 | | | |f1 | | 0.5114 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt index e61099f796ffbeb0600dc326a97cf20aed09ea52..e956839ba21e57d9f60059817766b32fe88d80a2 100644 --- a/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt +++ b/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6365 | | 0 | | - p2 | | | |f1 | | 0.5737 | | 0 | | - p3 | | | |f1 | | 0.6365 | | 0 | -| - RE | | | |f1 | | 0.4839 | |0 | +| - RE | | | |f1 | | 0.4883 | |0 | | - p1 | | | |f1 | | 0.4801 | | 0 | | - p2 | | | |f1 | | 0.4878 | | 0 | +| - p3 | | | |f1 | | 0.4972 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt index 43fe469ea2dced95ed141070f26d70b56e807bab..9b6e27e1e65dc4e4d80d829f031471763c651a1f 100644 --- a/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt +++ b/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6160 | | 0 | | - p2 | | | |f1 | | 0.6308 | | 0 | | - p3 | | | |f1 | | 0.6094 | | 0 | -| - RE | | | |f1 | | 0.5395 | |0 | +| - RE | | | |f1 | | 0.5518 | |0 | | - p1 | | | |f1 | | 0.5191 | | 0 | | - p2 | | | |f1 | | 0.5600 | | 0 | +| - p3 | | | |f1 | | 0.5764 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt index 457c5af3392b963073156197290202a23e90058a..f1b60273ff3047ec635fd913fc4fe0db8a2ca133 100644 --- a/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt +++ b/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6551 | | 0 | | - p2 | | | |f1 | | 0.6608 | | 0 | | - p3 | | | |f1 | | 0.6551 | | 0 | -| - RE | | | |f1 | | 0.5316 | |0 | +| - RE | | | |f1 | | 0.5405 | |0 | | - p1 | | | |f1 | | 0.5083 | | 0 | | - p2 | | | |f1 | | 0.5550 | | 0 | +| - p3 | | | |f1 | | 0.5581 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt index 7f1e9de6444c6822209bea393633e24162dd2816..f498e38336fb8c77988c54dcd070e31a0ac9a220 100644 --- a/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt +++ b/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.7142 | | 0 | | - p2 | | | |f1 | | 0.6992 | | 0 | | - p3 | | | |f1 | | 0.7212 | | 0 | -| - RE | | | |f1 | | 0.5530 | |0 | +| - RE | | | |f1 | | 0.5615 | |0 | | - p1 | | | |f1 | | 0.5223 | | 0 | | - p2 | | | |f1 | | 0.5837 | | 0 | +| - p3 | | | |f1 | | 0.5786 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt index a25d9505896b4fc63b8e9ae3f6bc0d40a45a5073..9eed39172f7a13688201364ca71ab665a6378bda 100644 --- a/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt +++ b/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6591 | | 0 | | - p2 | | | |f1 | | 0.6672 | | 0 | | - p3 | | | |f1 | | 0.6591 | | 0 | -| - RE | | | |f1 | | 0.5698 | |0 | +| - RE | | | |f1 | | 0.5592 | |0 | | - p1 | | | |f1 | | 0.5795 | | 0 | | - p2 | | | |f1 | | 0.5601 | | 0 | +| - p3 | | | |f1 | | 0.5380 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt index 53cb93e5f16fdc5e547385866930d8dafa6007ab..2f8297965f5ccbab4e4581425fdb9d9628f5cc8c 100644 --- a/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt +++ b/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6737 | | 0 | | - p2 | | | |f1 | | 0.6885 | | 0 | | - p3 | | | |f1 | | 0.6737 | | 0 | -| - RE | | | |f1 | | 0.5091 | |0 | +| - RE | | | |f1 | | 0.5095 | |0 | | - p1 | | | |f1 | | 0.5121 | | 0 | | - p2 | | | |f1 | | 0.5061 | | 0 | +| - p3 | | | |f1 | | 0.5103 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt index 5f8181907d3a7905338e8c0ad0828529f0ff2547..ed1c6926d8cdf2d22c23adf3393d14f1da9cd4d9 100644 --- a/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt +++ b/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6750 | | 0 | | - p2 | | | |f1 | | 0.6918 | | 0 | | - p3 | | | |f1 | | 0.6750 | | 0 | -| - RE | | | |f1 | | 0.4926 | |0 | +| - RE | | | |f1 | | 0.4999 | |0 | | - p1 | | | |f1 | | 0.5149 | | 0 | | - p2 | | | |f1 | | 0.4703 | | 0 | +| - p3 | | | |f1 | | 0.5145 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt index 2726400de0d4cf107d5b71b31accc1dbd0d13faa..0166f0aea940a1e11dbe79c1f2cf729f6c18c859 100644 --- a/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt +++ b/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6355 | | 0 | | - p2 | | | |f1 | | 0.6161 | | 0 | | - p3 | | | |f1 | | 0.6455 | | 0 | -| - RE | | | |f1 | | 0.5528 | |0 | +| - RE | | | |f1 | | 0.5540 | |0 | | - p1 | | | |f1 | | 0.5562 | | 0 | | - p2 | | | |f1 | | 0.5494 | | 0 | +| - p3 | | | |f1 | | 0.5565 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt index 80628c6127c1c44d5afd4041186db642fd700903..70f88e2528d3a1ff7cf33f95ceb671fc00a7aa14 100644 --- a/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt +++ b/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6836 | | 0 | | - p2 | | | |f1 | | 0.6846 | | 0 | | - p3 | | | |f1 | | 0.6836 | | 0 | -| - RE | | | |f1 | | 0.5629 | |0 | +| - RE | | | |f1 | | 0.5680 | |0 | | - p1 | | | |f1 | | 0.5392 | | 0 | | - p2 | | | |f1 | | 0.5867 | | 0 | +| - p3 | | | |f1 | | 0.5780 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt index 0c1913d2e7a4604f2927eecbe6068fbd0bd705c0..de0f1bbac03a767e292f5c6d52f4319e367c44c0 100644 --- a/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt +++ b/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt @@ -4,6 +4,7 @@ hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 | - NER | | | |f1 | | 0.7133 | |0 | | - p1 | | | |f1 | | 0.7262 | | 0 | | - p2 | | | |f1 | | 0.7005 | | 0 | -| - RE | | | |f1 | | 0.6077 | |0 | +| - RE | | | |f1 | | 0.5960 | |0 | | - p1 | | | |f1 | | 0.5919 | | 0 | | - p2 | | | |f1 | | 0.6235 | | 0 | +| - p3 | | | |f1 | | 0.5726 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt index 010a5ff0b33d4b65dd13caa21d97532e037456b6..0276a07fab4408898651f3f147a274d5d5df3c97 100644 --- a/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt +++ b/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6829 | | 0 | | - p2 | | | |f1 | | 0.6715 | | 0 | | - p3 | | | |f1 | | 0.6829 | | 0 | -| - RE | | | |f1 | | 0.6036 | |0 | +| - RE | | | |f1 | | 0.5997 | |0 | | - p1 | | | |f1 | | 0.5940 | | 0 | | - p2 | | | |f1 | | 0.6133 | | 0 | +| - p3 | | | |f1 | | 0.5918 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt index b922bfdfb8933cb22685f49680d32d1c37a1bcc3..003be3f4ed88a6a499b89ed958c415cc485b70c3 100644 --- a/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt +++ b/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.7143 | | 0 | | - p2 | | | |f1 | | 0.7127 | | 0 | | - p3 | | | |f1 | | 0.7143 | | 0 | -| - RE | | | |f1 | | 0.5149 | |0 | +| - RE | | | |f1 | | 0.5156 | |0 | | - p1 | | | |f1 | | 0.5111 | | 0 | | - p2 | | | |f1 | | 0.5188 | | 0 | +| - p3 | | | |f1 | | 0.5171 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt index 0d4a308afc878743ade464fb5bf07b97afd7eb08..2c8ad321754c222e65360614da1f6192f3387c7c 100644 --- a/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt +++ b/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6947 | | 0 | | - p2 | | | |f1 | | 0.6765 | | 0 | | - p3 | | | |f1 | | 0.6947 | | 0 | -| - RE | | | |f1 | | 0.5457 | |0 | +| - RE | | | |f1 | | 0.5469 | |0 | | - p1 | | | |f1 | | 0.5323 | | 0 | | - p2 | | | |f1 | | 0.5590 | | 0 | +| - p3 | | | |f1 | | 0.5494 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt index 180216d0234f301db7e5a762d9f6a7e37cd94de1..4bc79186d7029b363381303d141376c728294ab7 100644 --- a/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt +++ b/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4833 | | 0 | | - p2 | | | |f1 | | 0.5005 | | 0 | | - p3 | | | |f1 | | 0.4951 | | 0 | -| - RE | | | |f1 | | 0.1101 | |0 | +| - RE | | | |f1 | | 0.1198 | |0 | | - p1 | | | |f1 | | 0.0964 | | 0 | | - p2 | | | |f1 | | 0.1237 | | 0 | +| - p3 | | | |f1 | | 0.1391 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt index bebb267aed9aaf923d6e59b0a1667ba1dfc57dec..4352edec2156fd74172ffb93bfc7069ed935cce2 100644 --- a/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt +++ b/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4910 | | 0 | | - p2 | | | |f1 | | 0.5039 | | 0 | | - p3 | | | |f1 | | 0.4910 | | 0 | -| - RE | | | |f1 | | 0.1404 | |0 | +| - RE | | | |f1 | | 0.1453 | |0 | | - p1 | | | |f1 | | 0.1204 | | 0 | | - p2 | | | |f1 | | 0.1605 | | 0 | +| - p3 | | | |f1 | | 0.1551 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt index 6df38e34e2c0e0494740deb52f9f431147bf03aa..4b3a8e23520bec5746d9090322128b48cf1af41f 100644 --- a/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt +++ b/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5633 | | 0 | | - p2 | | | |f1 | | 0.5377 | | 0 | | - p3 | | | |f1 | | 0.5352 | | 0 | -| - RE | | | |f1 | | 0.1754 | |0 | +| - RE | | | |f1 | | 0.1753 | |0 | | - p1 | | | |f1 | | 0.1592 | | 0 | | - p2 | | | |f1 | | 0.1917 | | 0 | +| - p3 | | | |f1 | | 0.1751 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt index 117950740427fbfafee82d61b9becd177292a9ad..79c82263014a0069b6c825385d95cf6477004a4a 100644 --- a/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt +++ b/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5186 | | 0 | | - p2 | | | |f1 | | 0.5206 | | 0 | | - p3 | | | |f1 | | 0.5186 | | 0 | -| - RE | | | |f1 | | 0.1084 | |0 | +| - RE | | | |f1 | | 0.1055 | |0 | | - p1 | | | |f1 | | 0.1171 | | 0 | | - p2 | | | |f1 | | 0.0997 | | 0 | +| - p3 | | | |f1 | | 0.0997 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt index c4a399c079cfe7554d699384725ae9367de34a56..19c6346c5007538093d4b83f945e14ee4616490c 100644 --- a/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt +++ b/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4756 | | 0 | | - p2 | | | |f1 | | 0.4449 | | 0 | | - p3 | | | |f1 | | 0.4756 | | 0 | -| - RE | | | |f1 | | 0.1052 | |0 | +| - RE | | | |f1 | | 0.1035 | |0 | | - p1 | | | |f1 | | 0.1095 | | 0 | | - p2 | | | |f1 | | 0.1009 | | 0 | +| - p3 | | | |f1 | | 0.1000 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt index 8df050cf3c3f47a57abad65d937574315911ddf4..cc424f90dde2288be7dda70e93c0e761287409da 100644 --- a/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt +++ b/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5117 | | 0 | | - p2 | | | |f1 | | 0.4955 | | 0 | | - p3 | | | |f1 | | 0.5117 | | 0 | -| - RE | | | |f1 | | 0.1139 | |0 | +| - RE | | | |f1 | | 0.1260 | |0 | | - p1 | | | |f1 | | 0.1178 | | 0 | | - p2 | | | |f1 | | 0.1101 | | 0 | +| - p3 | | | |f1 | | 0.1501 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt index c216f0690aa0227aba457157c2a34bef6d921ac4..fb75acf6585f103c2dc138b976cb9d036ecee6c6 100644 --- a/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5009 | | 0 | | - p2 | | | |f1 | | 0.4966 | | 0 | | - p3 | | | |f1 | | 0.5049 | | 0 | -| - RE | | | |f1 | | 0.1135 | |0 | +| - RE | | | |f1 | | 0.1125 | |0 | | - p1 | | | |f1 | | 0.1175 | | 0 | | - p2 | | | |f1 | | 0.1095 | | 0 | +| - p3 | | | |f1 | | 0.1107 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt index f85b5b01881d041a71a80c22c055b2f4f726d17b..eba564920c2c92d7800080552c6a59b8def8c9b7 100644 --- a/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3375 | | 0 | | - p2 | | | |f1 | | 0.3403 | | 0 | | - p3 | | | |f1 | | 0.3375 | | 0 | -| - RE | | | |f1 | | 0.0554 | |0 | +| - RE | | | |f1 | | 0.0606 | |0 | | - p1 | | | |f1 | | 0.0427 | | 0 | | - p2 | | | |f1 | | 0.0681 | | 0 | +| - p3 | | | |f1 | | 0.0711 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt index 6e7923466ffd7382d6490aa51f180f788ee9f254..2c42fd3d91bc50bf07de543792051bd0fbd08f0c 100644 --- a/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5195 | | 0 | | - p2 | | | |f1 | | 0.5301 | | 0 | | - p3 | | | |f1 | | 0.5275 | | 0 | -| - RE | | | |f1 | | 0.1537 | |0 | +| - RE | | | |f1 | | 0.1499 | |0 | | - p1 | | | |f1 | | 0.2114 | | 0 | | - p2 | | | |f1 | | 0.0961 | | 0 | +| - p3 | | | |f1 | | 0.1422 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt index cd0993715e778d635e21a2d48302a68e2d8fff45..4fc3f8a0bc1e4a0d38f2f3729779f31828e7b70b 100644 --- a/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3913 | | 0 | | - p2 | | | |f1 | | 0.4132 | | 0 | | - p3 | | | |f1 | | 0.3913 | | 0 | -| - RE | | | |f1 | | 0.1231 | |0 | +| - RE | | | |f1 | | 0.1366 | |0 | | - p1 | | | |f1 | | 0.1255 | | 0 | | - p2 | | | |f1 | | 0.1207 | | 0 | +| - p3 | | | |f1 | | 0.1636 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt index 8d86fca74d2ca6322c59aba663d1d8f45ed9574c..b031c9c0bd0f5fe6669a53563e3681aa1a74d890 100644 --- a/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4106 | | 0 | | - p2 | | | |f1 | | 0.3861 | | 0 | | - p3 | | | |f1 | | 0.4106 | | 0 | -| - RE | | | |f1 | | 0.0557 | |0 | +| - RE | | | |f1 | | 0.0613 | |0 | | - p1 | | | |f1 | | 0.0509 | | 0 | | - p2 | | | |f1 | | 0.0606 | | 0 | +| - p3 | | | |f1 | | 0.0724 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt index e5b15c80f43fae57630478f1bf1b89e3c6282eb5..891a18854ac34e549e2ead223a4f5c50fa589fb3 100644 --- a/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4036 | | 0 | | - p2 | | | |f1 | | 0.3990 | | 0 | | - p3 | | | |f1 | | 0.4036 | | 0 | -| - RE | | | |f1 | | 0.0752 | |0 | +| - RE | | | |f1 | | 0.0748 | |0 | | - p1 | | | |f1 | | 0.0829 | | 0 | | - p2 | | | |f1 | | 0.0674 | | 0 | +| - p3 | | | |f1 | | 0.0742 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt index a20429567b560cbd1749d067fa745eba84f3e883..91d54c4d91a393447e1a9033fe06a44c2ea83264 100644 --- a/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5357 | | 0 | | - p2 | | | |f1 | | 0.5227 | | 0 | | - p3 | | | |f1 | | 0.5063 | | 0 | -| - RE | | | |f1 | | 0.1660 | |0 | +| - RE | | | |f1 | | 0.1719 | |0 | | - p1 | | | |f1 | | 0.1432 | | 0 | | - p2 | | | |f1 | | 0.1888 | | 0 | +| - p3 | | | |f1 | | 0.1836 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt index db8d90da14db73ebb33902ddd73b9c6076443499..116bb08deaa20d0ae0a961c362d4802b12d2add2 100644 --- a/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.2822 | | 0 | | - p2 | | | |f1 | | 0.2999 | | 0 | | - p3 | | | |f1 | | 0.2822 | | 0 | -| - RE | | | |f1 | | 0.0625 | |0 | +| - RE | | | |f1 | | 0.0675 | |0 | | - p1 | | | |f1 | | 0.0576 | | 0 | | - p2 | | | |f1 | | 0.0674 | | 0 | +| - p3 | | | |f1 | | 0.0777 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt index 01022afe783fed1ed37573dd0f37631ccc4de471..751b2811277dbd2d73fca6f47f097140e427c007 100644 --- a/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5729 | | 0 | | - p2 | | | |f1 | | 0.5627 | | 0 | | - p3 | | | |f1 | | 0.5790 | | 0 | -| - RE | | | |f1 | | 0.2590 | |0 | +| - RE | | | |f1 | | 0.2679 | |0 | | - p1 | | | |f1 | | 0.2873 | | 0 | | - p2 | | | |f1 | | 0.2307 | | 0 | +| - p3 | | | |f1 | | 0.2858 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt index 23e823098e1230f9205e25b581ba66d1e9604b8d..72e10fb742c76990d605ee3a2c3c4ef35b670091 100644 --- a/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4417 | | 0 | | - p2 | | | |f1 | | 0.4506 | | 0 | | - p3 | | | |f1 | | 0.4417 | | 0 | -| - RE | | | |f1 | | 0.2105 | |0 | +| - RE | | | |f1 | | 0.2291 | |0 | | - p1 | | | |f1 | | 0.1525 | | 0 | | - p2 | | | |f1 | | 0.2686 | | 0 | +| - p3 | | | |f1 | | 0.2662 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt index 4ca356c3fa0d0d2fbfc49bd6e1fb5c22a178641f..2d69cd7d7928f4a6c643567b723646b0ea9b62cc 100644 --- a/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4327 | | 0 | | - p2 | | | |f1 | | 0.4023 | | 0 | | - p3 | | | |f1 | | 0.4327 | | 0 | -| - RE | | | |f1 | | 0.1233 | |0 | +| - RE | | | |f1 | | 0.1313 | |0 | | - p1 | | | |f1 | | 0.1070 | | 0 | | - p2 | | | |f1 | | 0.1395 | | 0 | +| - p3 | | | |f1 | | 0.1473 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt index 4da54e27448814e22ec38a41151303a4f516855d..679149a7bff8efd0e260ce4a2032de17a86487c6 100644 --- a/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3973 | | 0 | | - p2 | | | |f1 | | 0.3564 | | 0 | | - p3 | | | |f1 | | 0.3973 | | 0 | -| - RE | | | |f1 | | 0.1312 | |0 | +| - RE | | | |f1 | | 0.1550 | |0 | | - p1 | | | |f1 | | 0.1155 | | 0 | | - p2 | | | |f1 | | 0.1468 | | 0 | +| - p3 | | | |f1 | | 0.2027 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt index c7c232863aaa58475d4e1a87093a767da7537dd1..e691afc144b5537fc7c41225a0f9323b2628b66e 100644 --- a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size | - p1 | | | |f1 | | 0.4725 | | 0 | | - p2 | | | |f1 | | 0.4730 | | 0 | | - p3 | | | |f1 | | 0.4805 | | 0 | -| - RE | | | |f1 | | 0.3314 | |0 | +| - RE | | | |f1 | | 0.3592 | |0 | | - p1 | | | |f1 | | 0.2593 | | 0 | | - p2 | | | |f1 | | 0.4034 | | 0 | +| - p3 | | | |f1 | | 0.4148 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt index 6542217f570cf2c774a5d8e6774ce8494e824f0d..b7051f2deb4230647fb3c9bbe0580a2fe84de6d8 100644 --- a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size | - p1 | | | |f1 | | 0.3498 | | 0 | | - p2 | | | |f1 | | 0.3648 | | 0 | | - p3 | | | |f1 | | 0.3498 | | 0 | -| - RE | | | |f1 | | 0.1699 | |0 | +| - RE | | | |f1 | | 0.1862 | |0 | | - p1 | | | |f1 | | 0.1055 | | 0 | | - p2 | | | |f1 | | 0.2343 | | 0 | +| - p3 | | | |f1 | | 0.2189 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt index 32975b7fe989ee9b90d1fe8f17345424a3c63029..8a1dc9d3cb03c6344b978ea84a9310c9d638cfb5 100644 --- a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size | - p1 | | | |f1 | | 0.5147 | | 0 | | - p2 | | | |f1 | | 0.5232 | | 0 | | - p3 | | | |f1 | | 0.5149 | | 0 | -| - RE | | | |f1 | | 0.3811 | |0 | +| - RE | | | |f1 | | 0.3958 | |0 | | - p1 | | | |f1 | | 0.3092 | | 0 | | - p2 | | | |f1 | | 0.4530 | | 0 | +| - p3 | | | |f1 | | 0.4252 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt index 98999dec048efd933959ae747c011243fa195858..0a825a34d1e4073d2ae5da7e22e86582b980912c 100644 --- a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size | - p1 | | | |f1 | | 0.4911 | | 0 | | - p2 | | | |f1 | | 0.5046 | | 0 | | - p3 | | | |f1 | | 0.4911 | | 0 | -| - RE | | | |f1 | | 0.3603 | |0 | +| - RE | | | |f1 | | 0.3296 | |0 | | - p1 | | | |f1 | | 0.3895 | | 0 | | - p2 | | | |f1 | | 0.3311 | | 0 | +| - p3 | | | |f1 | | 0.2683 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt index 66c2604baed07981da5e7d253b0692ca6e103aa1..3952afe3c0317b08c9e06f3caff5ae01eb9aa4e2 100644 --- a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size | - p1 | | | |f1 | | 0.4029 | | 0 | | - p2 | | | |f1 | | 0.3794 | | 0 | | - p3 | | | |f1 | | 0.4029 | | 0 | -| - RE | | | |f1 | | 0.2051 | |0 | +| - RE | | | |f1 | | 0.2132 | |0 | | - p1 | | | |f1 | | 0.2155 | | 0 | | - p2 | | | |f1 | | 0.1948 | | 0 | +| - p3 | | | |f1 | | 0.2293 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt index 21d83809b381556619ff7e5ab51516c24dfe0864..c0a21e1f1e602067347fb6c7ae3af7c47c220eb9 100644 --- a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size | - p1 | | | |f1 | | 0.4204 | | 0 | | - p2 | | | |f1 | | 0.4174 | | 0 | | - p3 | | | |f1 | | 0.4204 | | 0 | -| - RE | | | |f1 | | 0.1970 | |0 | +| - RE | | | |f1 | | 0.2018 | |0 | | - p1 | | | |f1 | | 0.1990 | | 0 | | - p2 | | | |f1 | | 0.1950 | | 0 | +| - p3 | | | |f1 | | 0.2115 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt index 7f83d34bd21324ddddf4507bb0dfc82a46ef1910..7e232099737212c32a37983c482f6e7bf1aee5d9 100644 --- a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_si | - p1 | | | |f1 | | 0.5777 | | 0 | | - p2 | | | |f1 | | 0.5841 | | 0 | | - p3 | | | |f1 | | 0.5668 | | 0 | -| - RE | | | |f1 | | 0.4245 | |0 | +| - RE | | | |f1 | | 0.4313 | |0 | | - p1 | | | |f1 | | 0.3482 | | 0 | | - p2 | | | |f1 | | 0.5008 | | 0 | +| - p3 | | | |f1 | | 0.4449 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt index 5305a935ac66ea3557ea860f92a81e4b3b7e3a7f..5c23dd02177855715602246da1ab145d4750a511 100644 --- a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_si | - p1 | | | |f1 | | 0.5081 | | 0 | | - p2 | | | |f1 | | 0.4988 | | 0 | | - p3 | | | |f1 | | 0.5081 | | 0 | -| - RE | | | |f1 | | 0.2162 | |0 | +| - RE | | | |f1 | | 0.2549 | |0 | | - p1 | | | |f1 | | 0.2029 | | 0 | | - p2 | | | |f1 | | 0.2296 | | 0 | +| - p3 | | | |f1 | | 0.3323 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt index 426c121a4b88ebed985cd20d4010f2d266160a1e..dc5b1148df6fbd0d32f982b72350b9baed9e392d 100644 --- a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_si | - p1 | | | |f1 | | 0.6430 | | 0 | | - p2 | | | |f1 | | 0.6437 | | 0 | | - p3 | | | |f1 | | 0.6457 | | 0 | -| - RE | | | |f1 | | 0.3404 | |0 | +| - RE | | | |f1 | | 0.3556 | |0 | | - p1 | | | |f1 | | 0.2708 | | 0 | | - p2 | | | |f1 | | 0.4099 | | 0 | +| - p3 | | | |f1 | | 0.3860 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt index aaab1bc243dd956c4437b12dcbad9c52cd405ac2..824dea14ef26e4fd07539f5cdc57cb0d72d7a869 100644 --- a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_si | - p1 | | | |f1 | | 0.5352 | | 0 | | - p2 | | | |f1 | | 0.5421 | | 0 | | - p3 | | | |f1 | | 0.5352 | | 0 | -| - RE | | | |f1 | | 0.1859 | |0 | +| - RE | | | |f1 | | 0.1906 | |0 | | - p1 | | | |f1 | | 0.1863 | | 0 | | - p2 | | | |f1 | | 0.1855 | | 0 | +| - p3 | | | |f1 | | 0.2001 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt index 1678af3e537d2be78af3049673d6f0fcd8541e12..6e3e1b2ef67ae845db75704f414dd97a01bc4d8a 100644 --- a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_si | - p1 | | | |f1 | | 0.5025 | | 0 | | - p2 | | | |f1 | | 0.5040 | | 0 | | - p3 | | | |f1 | | 0.5025 | | 0 | -| - RE | | | |f1 | | 0.1702 | |0 | +| - RE | | | |f1 | | 0.1832 | |0 | | - p1 | | | |f1 | | 0.1237 | | 0 | | - p2 | | | |f1 | | 0.2166 | | 0 | +| - p3 | | | |f1 | | 0.2094 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt index 09905f26ce5acd5fc32968382f2569524d922af4..c2b96898a18579d2b16376fa5e4d1159ed4fc544 100644 --- a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_si | - p1 | | | |f1 | | 0.5323 | | 0 | | - p2 | | | |f1 | | 0.5335 | | 0 | | - p3 | | | |f1 | | 0.5323 | | 0 | -| - RE | | | |f1 | | 0.1723 | |0 | +| - RE | | | |f1 | | 0.1725 | |0 | | - p1 | | | |f1 | | 0.1390 | | 0 | | - p2 | | | |f1 | | 0.2057 | | 0 | +| - p3 | | | |f1 | | 0.1727 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt index 116461a1d68f463761c7342d95e0a6c18d209b79..8852cb62bd0e04b214587916191c6b150f925661 100644 --- a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5840 | | 0 | | - p2 | | | |f1 | | 0.5421 | | 0 | | - p3 | | | |f1 | | 0.5928 | | 0 | -| - RE | | | |f1 | | 0.4960 | |0 | +| - RE | | | |f1 | | 0.5145 | |0 | | - p1 | | | |f1 | | 0.4335 | | 0 | | - p2 | | | |f1 | | 0.5586 | | 0 | +| - p3 | | | |f1 | | 0.5515 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt index 2c2e57b09937fbd9a661ea9eeea1d545a9014727..ba5c002a9264a1d56a51c72b4dc642ee87b8c605 100644 --- a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.3345 | | 0 | | - p2 | | | |f1 | | 0.3655 | | 0 | | - p3 | | | |f1 | | 0.3345 | | 0 | -| - RE | | | |f1 | | 0.3752 | |0 | +| - RE | | | |f1 | | 0.3591 | |0 | | - p1 | | | |f1 | | 0.3749 | | 0 | | - p2 | | | |f1 | | 0.3755 | | 0 | +| - p3 | | | |f1 | | 0.3268 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt index a8b908469a36074fdd599b214a6de21f33687ab3..e8b141d311893ba8c76e8ed7cce50f6f06752573 100644 --- a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5821 | | 0 | | - p2 | | | |f1 | | 0.5432 | | 0 | | - p3 | | | |f1 | | 0.5622 | | 0 | -| - RE | | | |f1 | | 0.5040 | |0 | +| - RE | | | |f1 | | 0.5226 | |0 | | - p1 | | | |f1 | | 0.4622 | | 0 | | - p2 | | | |f1 | | 0.5458 | | 0 | +| - p3 | | | |f1 | | 0.5597 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt index 8f83cc394deab4361dadca72e359dad9108165f1..068f9654a4427b28cd68c4493756660bf40e63a0 100644 --- a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4304 | | 0 | | - p2 | | | |f1 | | 0.4123 | | 0 | | - p3 | | | |f1 | | 0.4304 | | 0 | -| - RE | | | |f1 | | 0.5350 | |0 | +| - RE | | | |f1 | | 0.5396 | |0 | | - p1 | | | |f1 | | 0.5129 | | 0 | | - p2 | | | |f1 | | 0.5571 | | 0 | +| - p3 | | | |f1 | | 0.5489 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt index eba3a06478a8519e9938476f5f60a9ee4abfb883..dc5594c73257786e6edd1f8c852ad343d66e7f30 100644 --- a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4545 | | 0 | | - p2 | | | |f1 | | 0.4116 | | 0 | | - p3 | | | |f1 | | 0.4545 | | 0 | -| - RE | | | |f1 | | 0.4222 | |0 | +| - RE | | | |f1 | | 0.4261 | |0 | | - p1 | | | |f1 | | 0.3750 | | 0 | | - p2 | | | |f1 | | 0.4695 | | 0 | +| - p3 | | | |f1 | | 0.4338 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt index 9c812245ecae1e255c669705ae7b71ce3942868a..999a14510d1ea855adb9835bc9235c19f1a60783 100644 --- a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.4121 | | 0 | | - p2 | | | |f1 | | 0.3909 | | 0 | | - p3 | | | |f1 | | 0.4121 | | 0 | -| - RE | | | |f1 | | 0.2668 | |0 | +| - RE | | | |f1 | | 0.3133 | |0 | | - p1 | | | |f1 | | 0.2323 | | 0 | | - p2 | | | |f1 | | 0.3012 | | 0 | +| - p3 | | | |f1 | | 0.4063 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__en__10shot.txt b/csv_files/outputs/unsloth__phi-4__en__10shot.txt index 2f8a87e9012589d5d26d17645155c6504e9532cf..3f769aedc59b9b6be40222cca25d09daf6ffd0b3 100644 --- a/csv_files/outputs/unsloth__phi-4__en__10shot.txt +++ b/csv_files/outputs/unsloth__phi-4__en__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6098 | | 0 | | - p2 | | | |f1 | | 0.5711 | | 0 | | - p3 | | | |f1 | | 0.6141 | | 0 | -| - RE | | | |f1 | | 0.5269 | |0 | +| - RE | | | |f1 | | 0.5364 | |0 | | - p1 | | | |f1 | | 0.4912 | | 0 | | - p2 | | | |f1 | | 0.5626 | | 0 | +| - p3 | | | |f1 | | 0.5554 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__gr__10shot.txt b/csv_files/outputs/unsloth__phi-4__gr__10shot.txt index 7b2fb3a553fad01c0aab153c48d877c29231fe14..1f48b7d4235602d80223abd071f8764d2a1a5bfc 100644 --- a/csv_files/outputs/unsloth__phi-4__gr__10shot.txt +++ b/csv_files/outputs/unsloth__phi-4__gr__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5717 | | 0 | | - p2 | | | |f1 | | 0.5611 | | 0 | | - p3 | | | |f1 | | 0.5717 | | 0 | -| - RE | | | |f1 | | 0.5098 | |0 | +| - RE | | | |f1 | | 0.5291 | |0 | | - p1 | | | |f1 | | 0.4935 | | 0 | | - p2 | | | |f1 | | 0.5261 | | 0 | +| - p3 | | | |f1 | | 0.5678 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__it__10shot.txt b/csv_files/outputs/unsloth__phi-4__it__10shot.txt index f8334a6b52904612c998a336649a7d71a2d1193c..be3c82925982b576686dd2db835c6bbc58fc89f7 100644 --- a/csv_files/outputs/unsloth__phi-4__it__10shot.txt +++ b/csv_files/outputs/unsloth__phi-4__it__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.6647 | | 0 | | - p2 | | | |f1 | | 0.6732 | | 0 | | - p3 | | | |f1 | | 0.6897 | | 0 | -| - RE | | | |f1 | | 0.5714 | |0 | +| - RE | | | |f1 | | 0.5705 | |0 | | - p1 | | | |f1 | | 0.5608 | | 0 | | - p2 | | | |f1 | | 0.5820 | | 0 | +| - p3 | | | |f1 | | 0.5688 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__pl__10shot.txt b/csv_files/outputs/unsloth__phi-4__pl__10shot.txt index 4ea29a3036075f0246a37b515995fe97a4d96e05..13c70462fcbbc4333d7e40ab047995e60782311c 100644 --- a/csv_files/outputs/unsloth__phi-4__pl__10shot.txt +++ b/csv_files/outputs/unsloth__phi-4__pl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5549 | | 0 | | - p2 | | | |f1 | | 0.5324 | | 0 | | - p3 | | | |f1 | | 0.5549 | | 0 | -| - RE | | | |f1 | | 0.5591 | |0 | +| - RE | | | |f1 | | 0.5718 | |0 | | - p1 | | | |f1 | | 0.5423 | | 0 | | - p2 | | | |f1 | | 0.5760 | | 0 | +| - p3 | | | |f1 | | 0.5972 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__sk__10shot.txt b/csv_files/outputs/unsloth__phi-4__sk__10shot.txt index 925b775dfbe86c7320abed9213c1b3d54ef99b36..e55439f603a7ee43ebc4fb2b6489d94a69f17b05 100644 --- a/csv_files/outputs/unsloth__phi-4__sk__10shot.txt +++ b/csv_files/outputs/unsloth__phi-4__sk__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5561 | | 0 | | - p2 | | | |f1 | | 0.5449 | | 0 | | - p3 | | | |f1 | | 0.5561 | | 0 | -| - RE | | | |f1 | | 0.5050 | |0 | +| - RE | | | |f1 | | 0.5214 | |0 | | - p1 | | | |f1 | | 0.5106 | | 0 | | - p2 | | | |f1 | | 0.4994 | | 0 | +| - p3 | | | |f1 | | 0.5541 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__sl__10shot.txt b/csv_files/outputs/unsloth__phi-4__sl__10shot.txt index f54207bae749b676ad9b3ce437dcbc6e2780d019..debd951319f9e20f02aade8491ff82efa207384f 100644 --- a/csv_files/outputs/unsloth__phi-4__sl__10shot.txt +++ b/csv_files/outputs/unsloth__phi-4__sl__10shot.txt @@ -5,6 +5,7 @@ hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 | - p1 | | | |f1 | | 0.5586 | | 0 | | - p2 | | | |f1 | | 0.5558 | | 0 | | - p3 | | | |f1 | | 0.5586 | | 0 | -| - RE | | | |f1 | | 0.5175 | |0 | +| - RE | | | |f1 | | 0.5309 | |0 | | - p1 | | | |f1 | | 0.5117 | | 0 | | - p2 | | | |f1 | | 0.5232 | | 0 | +| - p3 | | | |f1 | | 0.5579 | | 0 | diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json index ea2480978d039f1b1f7e74c77f0e125646b09c10..6a5dcb9b2dbb21591b581cce2196c81f871307b8 100644 --- a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 19.2419855, + "average_CPS": 21.475744333333335, "config": { "model_name": "Henrychur/MMed-Llama-3-8B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 16.68, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 21.85, + "stderr": 0.0 } ], - "average_accuracy": 14.285, - "best_prompt": 16.68, - "prompt_id": "p2", - "CPS": 16.280514 + "average_accuracy": 16.80666666666667, + "best_prompt": 21.85, + "prompt_id": "p3", + "CPS": 20.74803166666667 } } } \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json index 798fc19f73b5d02b86780d34c25500074a98f950..1f8bd8998ccd6876a0227a2fa7e9ddebcfdae73c 100644 --- a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 13.378338333333332, + "average_CPS": 13.395712833333334, "config": { "model_name": "Henrychur/MMed-Llama-3-8B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 10.530000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 10.36, + "stderr": 0.0 } ], - "average_accuracy": 9.370000000000001, + "average_accuracy": 9.700000000000001, "best_prompt": 10.530000000000001, "prompt_id": "p2", - "CPS": 10.407852 + "CPS": 10.442601000000002 } } } \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json index 5710e85897fdd42598eecf823cf2d276238e0e9d..da2c2fd177cb5d790551318e999fa1074ce03aad 100644 --- a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 25.626247, + "average_CPS": 28.10758633333333, "config": { "model_name": "Henrychur/MMed-Llama-3-8B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 12.26, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 17.89, + "stderr": 0.0 } ], - "average_accuracy": 11.015, - "best_prompt": 12.26, - "prompt_id": "p2", - "CPS": 12.107363 + "average_accuracy": 13.306666666666667, + "best_prompt": 17.89, + "prompt_id": "p3", + "CPS": 17.070041666666665 } } } \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json index 5525632c7e620b509384edeacb8f3270a50bf804..40f10d5de954c6c63527e4da5a86c6b5bc87fb0b 100644 --- a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 25.169400916666667, + "average_CPS": 25.157004666666666, "config": { "model_name": "Henrychur/MMed-Llama-3-8B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 10.549999999999999, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 9.56, + "stderr": 0.0 } ], - "average_accuracy": 10.265, + "average_accuracy": 10.03, "best_prompt": 10.549999999999999, "prompt_id": "p2", - "CPS": 10.5199325 + "CPS": 10.49514 } } } \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json index fcec849e5324f8bdfd26768368482acde7395f7f..67253c8db4db743459db1f7a577231fcf3efbea7 100644 --- a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 23.07614591666667, + "average_CPS": 23.0736205, "config": { "model_name": "Henrychur/MMed-Llama-3-8B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 10.45, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 8.75, + "stderr": 0.0 } ], - "average_accuracy": 8.895, + "average_accuracy": 8.846666666666666, "best_prompt": 10.45, "prompt_id": "p2", - "CPS": 10.2875025 + "CPS": 10.282451666666665 } } } \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json index 4c1ebdcce253b5b57c4465de0e0584621709931d..dea3d7d1144e157500dfe519f09d449c46cf8c3e 100644 --- a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 23.50218116666667, + "average_CPS": 23.493655333333336, "config": { "model_name": "Henrychur/MMed-Llama-3-8B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 7.8100000000000005, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.19, + "stderr": 0.0 } ], - "average_accuracy": 7.840000000000001, + "average_accuracy": 7.623333333333334, "best_prompt": 7.870000000000001, "prompt_id": "p1", - "CPS": 7.867639000000001 + "CPS": 7.850587333333335 } } } \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json index a988bb43f6ab23ba3d1f9d6292c70a43460fb0d4..83d44663ba140bd7986281e05f24ae517e5fec0f 100644 --- a/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 7.145816833333333, + "average_CPS": 7.250459833333332, "config": { "model_name": "HiTZ/Medical-mT5-large", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 0.16, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.49, + "stderr": 0.0 } ], - "average_accuracy": 0.21999999999999997, - "best_prompt": 0.27999999999999997, - "prompt_id": "p1", - "CPS": 0.27983199999999997 + "average_accuracy": 0.31, + "best_prompt": 0.49, + "prompt_id": "p3", + "CPS": 0.489118 } } } \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json index 5076f03e4d6bee03c90dce600a468e881de065c0..d7cbc234177483f246aa7222fe098ffd9e9050c6 100644 --- a/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 7.3898055, + "average_CPS": 7.3897435, "config": { "model_name": "HiTZ/Medical-mT5-large", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 0.06999999999999999, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 } ], - "average_accuracy": 0.155, + "average_accuracy": 0.10333333333333333, "best_prompt": 0.24, "prompt_id": "p1", - "CPS": 0.23979599999999998 + "CPS": 0.239672 } } } \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json index 7d68a6f7a4a7f35a89a07d7310f58dc81d6d64fa..027c6bcc93f99fe26efb19434f44ce249e42b32e 100644 --- a/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 9.117984666666667, + "average_CPS": 9.117947333333333, "config": { "model_name": "HiTZ/Medical-mT5-large", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 0.64, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.45999999999999996, + "stderr": 0.0 } ], - "average_accuracy": 0.495, + "average_accuracy": 0.48333333333333334, "best_prompt": 0.64, "prompt_id": "p2", - "CPS": 0.6390720000000001 + "CPS": 0.6389973333333333 } } } \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json index 3fcb85a483bf3fd0e8debe59378c68608fe22ea3..87c60c0b6167aed67d83e7f074584fa26062c5ce 100644 --- a/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 7.915062, + "average_CPS": 7.915078666666666, "config": { "model_name": "HiTZ/Medical-mT5-large", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 0.22999999999999998, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.33999999999999997, + "stderr": 0.0 } ], - "average_accuracy": 0.315, + "average_accuracy": 0.3233333333333333, "best_prompt": 0.4, "prompt_id": "p1", - "CPS": 0.39966 + "CPS": 0.39969333333333334 } } } \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json index 0787b10790b37d461479e23937a23510088be9a9..6bd5f3d46bc83aaa152ed3a3044713e317ef066b 100644 --- a/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 7.5839295, + "average_CPS": 7.5838598333333325, "config": { "model_name": "HiTZ/Medical-mT5-large", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 0.24, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.2, + "stderr": 0.0 } ], - "average_accuracy": 0.31, + "average_accuracy": 0.2733333333333334, "best_prompt": 0.38, "prompt_id": "p1", - "CPS": 0.379734 + "CPS": 0.3795946666666667 } } } \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json index d17709a39eeba2feaacfe2e95ddbe4ef27a273db..c0ba959636d77a87783090bc8e67b69922d3e818 100644 --- a/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 7.6844565, + "average_CPS": 7.7788705, "config": { "model_name": "HiTZ/Medical-mT5-large", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 0.74, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.9299999999999999, + "stderr": 0.0 } ], - "average_accuracy": 0.735, - "best_prompt": 0.74, - "prompt_id": "p2", - "CPS": 0.739963 + "average_accuracy": 0.7999999999999999, + "best_prompt": 0.9299999999999999, + "prompt_id": "p3", + "CPS": 0.9287909999999999 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json index 688cbe3f4c7da32a0a536c318924992e30dad2f5..ad2e64f692310e59d4979f2769f0728866ed8d23 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 61.6096845, + "average_CPS": 61.561666833333334, "config": { "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 60.25, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 61.33, + "stderr": 0.0 } ], - "average_accuracy": 61.785, + "average_accuracy": 61.63333333333333, "best_prompt": 63.32, "prompt_id": "p1", - "CPS": 62.348037999999995 + "CPS": 62.25200266666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json index d5cfc474ac076f71e5220048db98a00b6aefcbcf..e09277e8cb3a41fed6acfe7d67ee311564df77c1 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 60.344233333333335, + "average_CPS": 60.84101533333333, "config": { "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 60.24, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 61.83, + "stderr": 0.0 } ], - "average_accuracy": 59.93, - "best_prompt": 60.24, - "prompt_id": "p2", - "CPS": 60.053256000000005 + "average_accuracy": 60.56333333333333, + "best_prompt": 61.83, + "prompt_id": "p3", + "CPS": 61.046820000000004 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json index 70df8ef1bd82faacadceec318971c7b386d52768..612f3d823ed5c8e68b855df3a6a4d42fcfb3273b 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 62.7346905, + "average_CPS": 63.1285495, "config": { "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 59.98, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 60.92999999999999, + "stderr": 0.0 } ], - "average_accuracy": 58.825, - "best_prompt": 59.98, - "prompt_id": "p2", - "CPS": 59.287231 + "average_accuracy": 59.526666666666664, + "best_prompt": 60.92999999999999, + "prompt_id": "p3", + "CPS": 60.07494899999999 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json index 88a28b36f85352de3ec8799000a7d048b4af6678..302d2bd2ae5f0e22e7b2e4592d0e6fbc37b4880f 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 59.88153925, + "average_CPS": 61.214045500000005, "config": { "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 54.66, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 63.74999999999999, + "stderr": 0.0 } ], - "average_accuracy": 57.285, - "best_prompt": 59.91, - "prompt_id": "p1", - "CPS": 58.3373625 + "average_accuracy": 59.44, + "best_prompt": 63.74999999999999, + "prompt_id": "p3", + "CPS": 61.002375 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json index c14348cbc2d630c7b923f2d562341c048402cade..6babcf1d0705f4e5a54e486e17e2fbcdba611556 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 61.6115985, + "average_CPS": 61.83102316666667, "config": { "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 58.45, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.589999999999996, + "stderr": 0.0 } ], - "average_accuracy": 58.69500000000001, - "best_prompt": 58.940000000000005, - "prompt_id": "p1", - "CPS": 58.79559700000001 + "average_accuracy": 58.99333333333334, + "best_prompt": 59.589999999999996, + "prompt_id": "p3", + "CPS": 59.23444633333334 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json index 2e8e49b9bc1e68464243b969e94f3885925d0a06..cc6a741c3b3abf073543a4dc67faf5ebfd74131b 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 61.52013541666667, + "average_CPS": 61.436353666666676, "config": { "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 57.82000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 57.809999999999995, + "stderr": 0.0 } ], - "average_accuracy": 58.655, + "average_accuracy": 58.373333333333335, "best_prompt": 59.489999999999995, "prompt_id": "p1", - "CPS": 58.993258499999996 + "CPS": 58.825695 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json index 7f7c5ffc1ea40ee3e42fa6e73c3000e65da3d4c0..548c90450e3b193ae6ba7a5a99aadb0e0d8bcbfb 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 62.2876165, + "average_CPS": 62.17364133333332, "config": { "model_name": "Qwen/Qwen2.5-32B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 64.69, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 63.7, + "stderr": 0.0 } ], - "average_accuracy": 64.755, + "average_accuracy": 64.40333333333332, "best_prompt": 64.82, "prompt_id": "p1", - "CPS": 64.777867 + "CPS": 64.54991666666665 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json index 0bae92ff5e8b97348c151dcb68efa3203adf0606..f864a1f533f405bf28a1bef9dab915b09c39db9b 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 60.45274641666667, + "average_CPS": 60.26106666666667, "config": { "model_name": "Qwen/Qwen2.5-32B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 58.96, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 57.099999999999994, + "stderr": 0.0 } ], - "average_accuracy": 59.045, + "average_accuracy": 58.39666666666667, "best_prompt": 59.13, "prompt_id": "p1", - "CPS": 59.0797395 + "CPS": 58.696380000000005 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json index 2fa19d8261fb781a23a09ec8a146d9dd282d3a9b..93839da32602f4fb71dad6858a4a71f9b91948d7 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 63.941768499999995, + "average_CPS": 63.77547316666666, "config": { "model_name": "Qwen/Qwen2.5-32B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 55.95, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.26, + "stderr": 0.0 } ], - "average_accuracy": 56.980000000000004, + "average_accuracy": 56.406666666666666, "best_prompt": 58.01, "prompt_id": "p1", - "CPS": 57.412497 + "CPS": 57.079906333333334 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json index 863d7b50c8936944694e079960c0346a0ddb96d9..fcf4994e83b99b9cdcf2823f96a46f364eacf4ad 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 59.36132466666666, + "average_CPS": 59.57754466666667, "config": { "model_name": "Qwen/Qwen2.5-32B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 58.68, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.38, + "stderr": 0.0 } ], - "average_accuracy": 58.629999999999995, - "best_prompt": 58.68, - "prompt_id": "p2", - "CPS": 58.650659999999995 + "average_accuracy": 58.879999999999995, + "best_prompt": 59.38, + "prompt_id": "p3", + "CPS": 59.083099999999995 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json index 2c3c383fea4b2e089c94d8512372e94d7b91f0f2..bff727b6119c1b6cc67c5cbcd8fb8780d195f344 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 62.09064391666667, + "average_CPS": 62.042391166666675, "config": { "model_name": "Qwen/Qwen2.5-32B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 55.86, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 56.089999999999996, + "stderr": 0.0 } ], - "average_accuracy": 56.595, + "average_accuracy": 56.42666666666667, "best_prompt": 57.330000000000005, "prompt_id": "p1", - "CPS": 56.9086245 + "CPS": 56.812119 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json index 00d6d1cdc907776f6d7f2a609186251e46c95ad5..02036f3a397a3c55c02e5f35f718065b9b393a2e 100644 --- a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 62.022576, + "average_CPS": 61.441851333333325, "config": { "model_name": "Qwen/Qwen2.5-32B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 58.489999999999995, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.39, + "stderr": 0.0 } ], - "average_accuracy": 59.205, + "average_accuracy": 57.26666666666666, "best_prompt": 59.919999999999995, "prompt_id": "p1", - "CPS": 59.491572 + "CPS": 58.330122666666654 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json index 5de18fbfc9ec4cdc6d944ec47a354715d45711d9..66ab4cf4698db844a85ef026fc1a8a70a24f8376 100644 --- a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 56.15015708333333, + "average_CPS": 56.52775183333333, "config": { "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 52.61, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.64, + "stderr": 0.0 } ], - "average_accuracy": 52.055, - "best_prompt": 52.61, - "prompt_id": "p2", - "CPS": 52.318014500000004 + "average_accuracy": 52.583333333333336, + "best_prompt": 53.64, + "prompt_id": "p3", + "CPS": 53.073204000000004 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json index 10d2d191709b8ef5a7f7f711c97b894c42c9d2f9..c9cf96c34cbea8c56f5cfa4a717dead09459b61a 100644 --- a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 56.10383349999999, + "average_CPS": 56.23321899999999, "config": { "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 52.09, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 52.23, + "stderr": 0.0 } ], - "average_accuracy": 51.12, - "best_prompt": 52.09, - "prompt_id": "p2", - "CPS": 51.584727 + "average_accuracy": 51.49, + "best_prompt": 52.23, + "prompt_id": "p3", + "CPS": 51.843498 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json index ffa6670eee1e4c959942239ae04e9461debaa43f..69395b5a37635269966540d3fa71a8d2235d41fa 100644 --- a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 63.45470708333333, + "average_CPS": 63.62756866666667, "config": { "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 58.379999999999995, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 60.650000000000006, + "stderr": 0.0 } ], - "average_accuracy": 59.394999999999996, - "best_prompt": 60.41, - "prompt_id": "p1", - "CPS": 59.7968385 + "average_accuracy": 59.81333333333333, + "best_prompt": 60.650000000000006, + "prompt_id": "p3", + "CPS": 60.142561666666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json index a0cc99350012b603cfd7d9fd7180b322f7753354..da5844ca39a76b5c2025b5fb76ba16c2ac95517c 100644 --- a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 56.759142, + "average_CPS": 56.796842, "config": { "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 52.0, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.949999999999996, + "stderr": 0.0 } ], - "average_accuracy": 51.515, + "average_accuracy": 51.66, "best_prompt": 52.0, "prompt_id": "p2", - "CPS": 51.7478 + "CPS": 51.8232 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json index c74cdba150a3653cec062b879731ac8ad733f12b..5c4ac821413c34ab2455dd7a927cbca44faf8e39 100644 --- a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 55.24974208333333, + "average_CPS": 56.404352833333334, "config": { "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 50.24999999999999, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.73, + "stderr": 0.0 } ], - "average_accuracy": 49.724999999999994, - "best_prompt": 50.24999999999999, - "prompt_id": "p2", - "CPS": 49.98618749999999 + "average_accuracy": 51.06, + "best_prompt": 53.73, + "prompt_id": "p3", + "CPS": 52.295409 } } } \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json index 73a7e15324e9b247003bd8046f28a26cadb57b53..f6e80a3a2fbd0ad95707bba1d29834ed15eda75e 100644 --- a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 59.49870649999999, + "average_CPS": 59.600933166666664, "config": { "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 55.76, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 54.290000000000006, + "stderr": 0.0 } ], - "average_accuracy": 53.19, + "average_accuracy": 53.55666666666667, "best_prompt": 55.76, "prompt_id": "p2", - "CPS": 54.326967999999994 + "CPS": 54.531421333333334 } } } \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B_0_EN-checkpoint.json b/e3c_llm_results/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B_0_EN-checkpoint.json deleted file mode 100644 index 9443fb54d23a6e830e20452f33cc1ccc3dbede21..0000000000000000000000000000000000000000 --- a/e3c_llm_results/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B_0_EN-checkpoint.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "average_CPS": 38.6086025, - "config": { - "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "num_fewshot": "0", - "batch_size": 1, - "LANG": "EN", - "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "base_model": "Qwen2ForCausalLM", - "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", - "submitted_time": "2025-01-20 09:19:00+00:00", - "num_params_billion": 32.763876352, - "language": "" - }, - "tasks": { - "NER": { - "prompts": [ - { - "prompt": "p1", - "metric": "f1", - "value": 19.63, - "stderr": 0.0 - }, - { - "prompt": "p2", - "metric": "f1", - "value": 34.589999999999996, - "stderr": 0.0 - }, - { - "prompt": "p3", - "metric": "f1", - "value": 32.08, - "stderr": 0.0 - } - ], - "average_accuracy": 28.766666666666666, - "best_prompt": 34.589999999999996, - "prompt_id": "p2", - "CPS": 32.575708999999996 - }, - "RE": { - "prompts": [ - { - "prompt": "p1", - "metric": "f1", - "value": 44.87, - "stderr": 0.0 - }, - { - "prompt": "p2", - "metric": "f1", - "value": 44.92, - "stderr": 0.0 - }, - { - "prompt": "p3", - "metric": "f1", - "value": 43.11, - "stderr": 0.0 - } - ], - "average_accuracy": 44.300000000000004, - "best_prompt": 44.92, - "prompt_id": "p2", - "CPS": 44.641496000000004 - } - } -} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json index 2333becdd494f34a1cc9ea218685238e180c08ae..696e89129e5dd6e0370d3d2d9bc4b04394b43ac8 100644 --- a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 55.919866, + "average_CPS": 56.163166000000004, "config": { "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 51.99, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 52.73, + "stderr": 0.0 } ], - "average_accuracy": 51.95, - "best_prompt": 51.99, - "prompt_id": "p2", - "CPS": 51.969204000000005 + "average_accuracy": 52.21, + "best_prompt": 52.73, + "prompt_id": "p3", + "CPS": 52.455804 } } } \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json index 7f68ed42d5b6dc6a22dca3c2c26dfd0afc13a196..dc934142fa8f6ae05bf7b9037604dfca268b6b8e 100644 --- a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 51.55757925, + "average_CPS": 52.0035325, "config": { "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 42.1, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 45.69, + "stderr": 0.0 } ], - "average_accuracy": 43.385000000000005, - "best_prompt": 44.67, - "prompt_id": "p1", - "CPS": 44.095990500000006 + "average_accuracy": 44.153333333333336, + "best_prompt": 45.69, + "prompt_id": "p3", + "CPS": 44.987897 } } } \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json index d469a75368b2a518562ccc911a0a957a722cd8b4..2da72f0769a9db34459312397de684c3b6a0326c 100644 --- a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 62.19916833333333, + "average_CPS": 62.18253033333333, "config": { "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 55.26, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.17999999999999, + "stderr": 0.0 } ], - "average_accuracy": 55.36, + "average_accuracy": 55.29999999999999, "best_prompt": 55.46, "prompt_id": "p1", - "CPS": 55.404540000000004 + "CPS": 55.37126399999999 } } } \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json index c0781d3cf163160677e909dfa5d081124e3855d8..005df5057a6c562af58001c2d2df443d7ad25fde 100644 --- a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 56.29728216666666, + "average_CPS": 56.36566883333333, "config": { "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 51.29, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 50.760000000000005, + "stderr": 0.0 } ], - "average_accuracy": 49.96, + "average_accuracy": 50.22666666666667, "best_prompt": 51.29, "prompt_id": "p2", - "CPS": 50.607843 + "CPS": 50.74461633333333 } } } \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json index 68b7731551528182c86059495e8503c1ec21ceb5..89f66951b3703ddcd6b456073bcb139515aea658 100644 --- a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 55.168621666666674, + "average_CPS": 55.21981100000001, "config": { "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 44.51, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 46.89, + "stderr": 0.0 } ], - "average_accuracy": 46.25, + "average_accuracy": 46.46333333333333, "best_prompt": 47.99, "prompt_id": "p1", - "CPS": 47.154974 + "CPS": 47.25735266666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json index fbb0ba642dd08cde680dad6026daa9777865b931..cdfe1ae096d22d5a9a8912f4dbf8f7094c659b74 100644 --- a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 55.26683691666667, + "average_CPS": 55.28181983333334, "config": { "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 46.739999999999995, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 49.230000000000004, + "stderr": 0.0 } ], - "average_accuracy": 49.055, + "average_accuracy": 49.11333333333334, "best_prompt": 51.370000000000005, "prompt_id": "p1", - "CPS": 50.1807845 + "CPS": 50.21075033333334 } } } \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json b/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json index 434a5329b12aece570f1653c203c6d90d132951b..34e9f8006e74510a3b49e80b95c96b5d8c8c1896 100644 --- a/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 10.826788500000001, + "average_CPS": 10.821493833333335, "config": { "model_name": "epfl-llm/meditron-7b", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 6.92, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 6.63, + "stderr": 0.0 } ], - "average_accuracy": 7.07, + "average_accuracy": 6.923333333333333, "best_prompt": 7.22, "prompt_id": "p1", - "CPS": 7.20917 + "CPS": 7.1985806666666665 } } } \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json b/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json index 6baca6a43c7ffb81f47740fc6aab100082d5b1e2..f4e964940f8370c4443037e6fc5a553f27de8810 100644 --- a/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json @@ -52,6 +52,12 @@ "metric": "f1", "value": 0.0, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 } ], "average_accuracy": 0.0, diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json b/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json index 7031881141abb2a27198e9213d39c3dfd04074cb..244cb48a61f03eb4ab7bb9983b4eff7e62f2398f 100644 --- a/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 21.748485083333332, + "average_CPS": 22.130671999999997, "config": { "model_name": "epfl-llm/meditron-7b", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 8.870000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 9.68, + "stderr": 0.0 } ], - "average_accuracy": 8.595, - "best_prompt": 8.870000000000001, - "prompt_id": "p2", - "CPS": 8.8456075 + "average_accuracy": 8.956666666666667, + "best_prompt": 9.68, + "prompt_id": "p3", + "CPS": 9.609981333333334 } } } \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json b/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json index 6d10f8a603bb645cb8f7878e792ae3f4f5f3b330..09d598b30fd86852c02b549323a6e7a0aea76051 100644 --- a/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 19.016219, + "average_CPS": 19.029036333333334, "config": { "model_name": "epfl-llm/meditron-7b", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 4.61, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 5.35, + "stderr": 0.0 } ], - "average_accuracy": 4.970000000000001, - "best_prompt": 5.33, - "prompt_id": "p1", - "CPS": 5.310811999999999 + "average_accuracy": 5.096666666666667, + "best_prompt": 5.35, + "prompt_id": "p3", + "CPS": 5.336446666666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json b/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json index f863ae30b498c369fe5313b55c180baed455017a..cd967d4b173404407d135310fdf5de55bc864653 100644 --- a/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 17.222192333333332, + "average_CPS": 17.218929, "config": { "model_name": "epfl-llm/meditron-7b", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 3.93, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 3.75, + "stderr": 0.0 } ], - "average_accuracy": 4.19, + "average_accuracy": 4.043333333333334, "best_prompt": 4.45, "prompt_id": "p1", - "CPS": 4.43843 + "CPS": 4.4319033333333335 } } } \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json b/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json index cb031f581ed64e83aa508b1434f11d9477dfec7a..ec1bf7f96297f1f25a32b81c46c2521199f022f2 100644 --- a/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 17.99146783333333, + "average_CPS": 18.122609833333332, "config": { "model_name": "epfl-llm/meditron-7b", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 5.01, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 5.28, + "stderr": 0.0 } ], - "average_accuracy": 4.89, - "best_prompt": 5.01, - "prompt_id": "p2", - "CPS": 5.003988 + "average_accuracy": 5.02, + "best_prompt": 5.28, + "prompt_id": "p3", + "CPS": 5.266272 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_EN.json b/e3c_llm_results/google/gemma-2-9b-it_10_EN.json index cc2ad75233b19512ad0e9566419d36068a722532..82bee9cdb274254c7284f05a5ae03969262a3588 100644 --- a/e3c_llm_results/google/gemma-2-9b-it_10_EN.json +++ b/e3c_llm_results/google/gemma-2-9b-it_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 56.581740499999995, + "average_CPS": 56.887223, "config": { "model_name": "google/gemma-2-9b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 53.37, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 54.09, + "stderr": 0.0 } ], - "average_accuracy": 52.5, - "best_prompt": 53.37, - "prompt_id": "p2", - "CPS": 52.905681 + "average_accuracy": 53.03, + "best_prompt": 54.09, + "prompt_id": "p3", + "CPS": 53.516646 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_GR.json b/e3c_llm_results/google/gemma-2-9b-it_10_GR.json index 0bba7406aa35200f7ea3dc465af5fde82f3e4925..0e2098f70ae078b548e5f599d56bc7e7e39b083d 100644 --- a/e3c_llm_results/google/gemma-2-9b-it_10_GR.json +++ b/e3c_llm_results/google/gemma-2-9b-it_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 55.2137075, + "average_CPS": 56.44067866666666, "config": { "model_name": "google/gemma-2-9b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 49.71, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 54.44, + "stderr": 0.0 } ], - "average_accuracy": 50.205, - "best_prompt": 50.7, - "prompt_id": "p1", - "CPS": 50.449035 + "average_accuracy": 51.61666666666667, + "best_prompt": 54.44, + "prompt_id": "p3", + "CPS": 52.90297733333333 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_IT.json b/e3c_llm_results/google/gemma-2-9b-it_10_IT.json index 5a0cdb8b6d62f4718c31f660903e246d14effa82..64051e3c760740b47f74e2fc169fed984b21a83c 100644 --- a/e3c_llm_results/google/gemma-2-9b-it_10_IT.json +++ b/e3c_llm_results/google/gemma-2-9b-it_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 60.12889791666666, + "average_CPS": 60.25721083333333, "config": { "model_name": "google/gemma-2-9b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 53.65, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.05, + "stderr": 0.0 } ], - "average_accuracy": 51.614999999999995, + "average_accuracy": 52.093333333333334, "best_prompt": 53.65, "prompt_id": "p2", - "CPS": 52.55822249999999 + "CPS": 52.81484833333334 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_PL.json b/e3c_llm_results/google/gemma-2-9b-it_10_PL.json index bf494b9a6cb7e1aa37d080b22c9228d6192d8a6e..f90e29dbb87b8f82e390fa684d0c6094d0f9a559 100644 --- a/e3c_llm_results/google/gemma-2-9b-it_10_PL.json +++ b/e3c_llm_results/google/gemma-2-9b-it_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 54.86958533333334, + "average_CPS": 54.98672666666667, "config": { "model_name": "google/gemma-2-9b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 48.08, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.239999999999995, + "stderr": 0.0 } ], - "average_accuracy": 49.88, + "average_accuracy": 50.333333333333336, "best_prompt": 51.68000000000001, "prompt_id": "p1", - "CPS": 50.74976000000001 + "CPS": 50.98404266666667 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_SK.json b/e3c_llm_results/google/gemma-2-9b-it_10_SK.json index aa0fae50b3fef12bdfedd530ed4b7fcaf9e8fbb6..40670e1d69af82c8078e42c74eba38fd62ee83b9 100644 --- a/e3c_llm_results/google/gemma-2-9b-it_10_SK.json +++ b/e3c_llm_results/google/gemma-2-9b-it_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 55.936541749999996, + "average_CPS": 56.074384499999994, "config": { "model_name": "google/gemma-2-9b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 47.54, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.13999999999999, + "stderr": 0.0 } ], - "average_accuracy": 49.535, + "average_accuracy": 50.06999999999999, "best_prompt": 51.53, "prompt_id": "p1", - "CPS": 50.5019765 + "CPS": 50.777662 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_SL.json b/e3c_llm_results/google/gemma-2-9b-it_10_SL.json index a3f3562b3e48f6decc3ba27495b4e0ca19f77d6e..1d426183767a1511dcbef244831d32a4b3a2e57d 100644 --- a/e3c_llm_results/google/gemma-2-9b-it_10_SL.json +++ b/e3c_llm_results/google/gemma-2-9b-it_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 55.45489516666666, + "average_CPS": 55.7992, "config": { "model_name": "google/gemma-2-9b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 48.78, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 49.72, + "stderr": 0.0 } ], - "average_accuracy": 48.395, - "best_prompt": 48.78, - "prompt_id": "p2", - "CPS": 48.592197 + "average_accuracy": 48.836666666666666, + "best_prompt": 49.72, + "prompt_id": "p3", + "CPS": 49.28080666666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_EN.json b/e3c_llm_results/google/gemma-3-27b-it_10_EN.json index 39851139187b27808e4d7f952fb8d85d50a5e5c7..903c6cb300673438b7555124cb15fe2b7e0e795f 100644 --- a/e3c_llm_results/google/gemma-3-27b-it_10_EN.json +++ b/e3c_llm_results/google/gemma-3-27b-it_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 58.58681733333334, + "average_CPS": 59.271406000000006, "config": { "model_name": "google/gemma-3-27b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 56.00000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 57.64, + "stderr": 0.0 } ], - "average_accuracy": 53.955000000000005, - "best_prompt": 56.00000000000001, - "prompt_id": "p2", - "CPS": 54.85480000000001 + "average_accuracy": 55.18333333333334, + "best_prompt": 57.64, + "prompt_id": "p3", + "CPS": 56.22397733333334 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_GR.json b/e3c_llm_results/google/gemma-3-27b-it_10_GR.json index a6109ef1b3e6581047d1257034f65ac2c9c1866f..fe1b22dab57d0df99b5b5323e5b5fda6b4bcdbbc 100644 --- a/e3c_llm_results/google/gemma-3-27b-it_10_GR.json +++ b/e3c_llm_results/google/gemma-3-27b-it_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 60.0164855, + "average_CPS": 60.327389833333335, "config": { "model_name": "google/gemma-3-27b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 55.50000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.81, + "stderr": 0.0 } ], - "average_accuracy": 53.165000000000006, - "best_prompt": 55.50000000000001, - "prompt_id": "p2", - "CPS": 54.20407500000001 + "average_accuracy": 54.046666666666674, + "best_prompt": 55.81, + "prompt_id": "p3", + "CPS": 54.82588366666667 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_IT.json b/e3c_llm_results/google/gemma-3-27b-it_10_IT.json index 72bd21009a82814bac749f05afecd68b1a1ce129..1bc2570adab2f3f2d327d8254b5bd03aa0a0f0de 100644 --- a/e3c_llm_results/google/gemma-3-27b-it_10_IT.json +++ b/e3c_llm_results/google/gemma-3-27b-it_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 64.0004405, + "average_CPS": 64.24948583333332, "config": { "model_name": "google/gemma-3-27b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 58.37, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 57.86, + "stderr": 0.0 } ], - "average_accuracy": 55.3, + "average_accuracy": 56.15333333333333, "best_prompt": 58.37, "prompt_id": "p2", - "CPS": 56.578041 + "CPS": 57.07613166666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_PL.json b/e3c_llm_results/google/gemma-3-27b-it_10_PL.json index 9a90d98c694bde491a52c642376e96ffc7b4aa27..8de0837499b10ebd8ec72ea7bc81beaa038d1983 100644 --- a/e3c_llm_results/google/gemma-3-27b-it_10_PL.json +++ b/e3c_llm_results/google/gemma-3-27b-it_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 61.87379849999999, + "average_CPS": 61.5666635, "config": { "model_name": "google/gemma-3-27b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 56.010000000000005, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.800000000000004, + "stderr": 0.0 } ], - "average_accuracy": 56.980000000000004, + "average_accuracy": 55.92000000000001, "best_prompt": 57.95, "prompt_id": "p1", - "CPS": 57.387885 + "CPS": 56.77361500000001 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_SK.json b/e3c_llm_results/google/gemma-3-27b-it_10_SK.json index 49c3469167d1730e3a4b64251d858dbb86bac7f2..cc28cf5961280cd567f4368f68a587617b7be08c 100644 --- a/e3c_llm_results/google/gemma-3-27b-it_10_SK.json +++ b/e3c_llm_results/google/gemma-3-27b-it_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 59.613524999999996, + "average_CPS": 59.623766999999994, "config": { "model_name": "google/gemma-3-27b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 50.61, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.03, + "stderr": 0.0 } ], - "average_accuracy": 50.91, + "average_accuracy": 50.949999999999996, "best_prompt": 51.21, "prompt_id": "p1", - "CPS": 51.05637 + "CPS": 51.076854 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_SL.json b/e3c_llm_results/google/gemma-3-27b-it_10_SL.json index 1a4bf277bd953b6dbafae6009a4de844f3c955ae..37a77ebd450c4aabb1937362c0020ecdc0c132dc 100644 --- a/e3c_llm_results/google/gemma-3-27b-it_10_SL.json +++ b/e3c_llm_results/google/gemma-3-27b-it_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 59.373478500000004, + "average_CPS": 59.561417000000006, "config": { "model_name": "google/gemma-3-27b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 47.03, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.449999999999996, + "stderr": 0.0 } ], - "average_accuracy": 49.260000000000005, + "average_accuracy": 49.99, "best_prompt": 51.49, "prompt_id": "p1", - "CPS": 50.341773 + "CPS": 50.71765 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json b/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json index 8f37ddd9d98878decba3c075bd569bdddd6e8706..f17f4bcfaa89a0ccec0e26eb196cc71f46930348 100644 --- a/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 59.56656766666666, + "average_CPS": 59.60748666666666, "config": { "model_name": "google/medgemma-27b-text-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 54.94, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.65, + "stderr": 0.0 } ], - "average_accuracy": 55.28, - "best_prompt": 55.620000000000005, - "prompt_id": "p1", - "CPS": 55.430892 + "average_accuracy": 55.403333333333336, + "best_prompt": 55.65, + "prompt_id": "p3", + "CPS": 55.512730000000005 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json b/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json index 15883f234d5b4039d2c458f7d80870b1e6583e8f..862506ffca374fa15a05b2c7383529a42f4480da 100644 --- a/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 62.84547375, + "average_CPS": 62.99263766666667, "config": { "model_name": "google/medgemma-27b-text-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 58.67, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 57.8, + "stderr": 0.0 } ], - "average_accuracy": 56.295, + "average_accuracy": 56.79666666666666, "best_prompt": 58.67, "prompt_id": "p2", - "CPS": 57.2765875 + "CPS": 57.57091533333333 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json b/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json index b93101faf21531e6fd73ddbbefa27657ffb533b4..4df93fec54df7c07e05c799c2bde973a7e48bf8e 100644 --- a/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 66.5258515, + "average_CPS": 66.161104, "config": { "model_name": "google/medgemma-27b-text-it", "num_fewshot": "10", @@ -46,12 +46,18 @@ "metric": "f1", "value": 62.35000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 57.26, + "stderr": 0.0 } ], - "average_accuracy": 60.77, + "average_accuracy": 59.6, "best_prompt": 62.35000000000001, "prompt_id": "p2", - "CPS": 61.36487 + "CPS": 60.635375 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json b/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json index 484f18476901461347c20029d764b523f15639d4..687310ea6f0b55027aaed4f5bca0fa501be783b9 100644 --- a/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 64.38433175, + "average_CPS": 64.263205, "config": { "model_name": "google/medgemma-27b-text-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 61.33, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.18, + "stderr": 0.0 } ], - "average_accuracy": 60.364999999999995, + "average_accuracy": 59.97, "best_prompt": 61.33, "prompt_id": "p2", - "CPS": 60.738165499999994 + "CPS": 60.495912000000004 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json b/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json index 6ff6020bc8239293936c81c1ceedeffb517bb34e..b616f992aa614e8edce277364ba149d35bf96e45 100644 --- a/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 61.536083, + "average_CPS": 61.55467333333333, "config": { "model_name": "google/medgemma-27b-text-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 51.88, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.71, + "stderr": 0.0 } ], - "average_accuracy": 51.495000000000005, + "average_accuracy": 51.56666666666666, "best_prompt": 51.88, "prompt_id": "p2", - "CPS": 51.680262 + "CPS": 51.71744266666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json b/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json index 532f06d770ab15eb3fab5a51e5b8da8028ef5876..4024c2b5064f2c6fc30e552dd613b311e8510d90 100644 --- a/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 62.10114183333333, + "average_CPS": 62.13607933333333, "config": { "model_name": "google/medgemma-27b-text-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 55.900000000000006, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 54.94, + "stderr": 0.0 } ], - "average_accuracy": 54.565, + "average_accuracy": 54.69, "best_prompt": 55.900000000000006, "prompt_id": "p2", - "CPS": 55.153735 + "CPS": 55.22361 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_EN.json b/e3c_llm_results/google/medgemma-4b-it_10_EN.json index 807748196ed95b23ef9beb177def349cf1649619..3cf5682456197486d183255d45b2a3960a5ed3b3 100644 --- a/e3c_llm_results/google/medgemma-4b-it_10_EN.json +++ b/e3c_llm_results/google/medgemma-4b-it_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 30.937053083333332, + "average_CPS": 31.656783166666663, "config": { "model_name": "google/medgemma-4b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 12.370000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 13.91, + "stderr": 0.0 } ], - "average_accuracy": 11.005, - "best_prompt": 12.370000000000001, - "prompt_id": "p2", - "CPS": 12.2011495 + "average_accuracy": 11.973333333333334, + "best_prompt": 13.91, + "prompt_id": "p3", + "CPS": 13.640609666666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_GR.json b/e3c_llm_results/google/medgemma-4b-it_10_GR.json index 9eb3f40ba2685801aeb92716d13041033aa52942..2c531814bb9d42699d6b76c7abba643e7e4a6095 100644 --- a/e3c_llm_results/google/medgemma-4b-it_10_GR.json +++ b/e3c_llm_results/google/medgemma-4b-it_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 32.84242175, + "average_CPS": 32.8816105, "config": { "model_name": "google/medgemma-4b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 16.05, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 15.509999999999998, + "stderr": 0.0 } ], - "average_accuracy": 14.045, + "average_accuracy": 14.533333333333331, "best_prompt": 16.05, "prompt_id": "p2", - "CPS": 15.7281975 + "CPS": 15.806575 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_IT.json b/e3c_llm_results/google/medgemma-4b-it_10_IT.json index 10aaf80dc8c2c39a4049e39b28ed20897231ba03..7bfa11ea9d723f1f50e9ac88d06cd9eb75050e1b 100644 --- a/e3c_llm_results/google/medgemma-4b-it_10_IT.json +++ b/e3c_llm_results/google/medgemma-4b-it_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 37.09009025, + "average_CPS": 37.088972000000005, "config": { "model_name": "google/medgemma-4b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 19.17, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 17.51, + "stderr": 0.0 } ], - "average_accuracy": 17.545, + "average_accuracy": 17.533333333333335, "best_prompt": 19.17, "prompt_id": "p2", - "CPS": 18.858487500000003 + "CPS": 18.856251000000004 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_PL.json b/e3c_llm_results/google/medgemma-4b-it_10_PL.json index 36cef39e6ce98b8e2408629ead032012867984b6..8a258a06ac9f7864f3564fb7afc03ccad764f71c 100644 --- a/e3c_llm_results/google/medgemma-4b-it_10_PL.json +++ b/e3c_llm_results/google/medgemma-4b-it_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 31.79935483333333, + "average_CPS": 31.782375333333327, "config": { "model_name": "google/medgemma-4b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 9.969999999999999, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 9.969999999999999, + "stderr": 0.0 } ], - "average_accuracy": 10.84, + "average_accuracy": 10.549999999999999, "best_prompt": 11.709999999999999, "prompt_id": "p1", - "CPS": 11.608122999999999 + "CPS": 11.574163999999998 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_SK.json b/e3c_llm_results/google/medgemma-4b-it_10_SK.json index b34fccb0376282fa798c99e03b66b0b227e575a0..2df2912042b4e00a4fd588a7a28d8362acff28f8 100644 --- a/e3c_llm_results/google/medgemma-4b-it_10_SK.json +++ b/e3c_llm_results/google/medgemma-4b-it_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 28.988108833333335, + "average_CPS": 28.978618833333336, "config": { "model_name": "google/medgemma-4b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 10.09, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 10.0, + "stderr": 0.0 } ], - "average_accuracy": 10.52, + "average_accuracy": 10.346666666666666, "best_prompt": 10.95, "prompt_id": "p1", - "CPS": 10.902915 + "CPS": 10.883935 } } } \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_SL.json b/e3c_llm_results/google/medgemma-4b-it_10_SL.json index 5e0d57a9c19ecdd26c6dd5e21eafd1bb53e91109..12372701522a392836b27245e1a82d59a6ef1bfe 100644 --- a/e3c_llm_results/google/medgemma-4b-it_10_SL.json +++ b/e3c_llm_results/google/medgemma-4b-it_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 31.314164499999997, + "average_CPS": 32.7709705, "config": { "model_name": "google/medgemma-4b-it", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 11.01, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 15.010000000000002, + "stderr": 0.0 } ], - "average_accuracy": 11.395, - "best_prompt": 11.78, - "prompt_id": "p1", - "CPS": 11.734646999999999 + "average_accuracy": 12.6, + "best_prompt": 15.010000000000002, + "prompt_id": "p3", + "CPS": 14.648259000000001 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json index f344c5490e953ef567e6c28ca7c40b9f4771653c..172cd2593a629873e135edee8b555b1ff5567d9a 100644 --- a/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 30.9929955, + "average_CPS": 30.98751216666667, "config": { "model_name": "microsoft/MediPhi-Clinical", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 10.95, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 11.07, + "stderr": 0.0 } ], - "average_accuracy": 11.35, + "average_accuracy": 11.256666666666666, "best_prompt": 11.75, "prompt_id": "p1", - "CPS": 11.703 + "CPS": 11.692033333333333 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json index 5b87eebc7064e75682cca17d008cdafbb96e0343..7cc2471b33b1a5ca97d09a4142e483c5b70e63e0 100644 --- a/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 20.344995166666667, + "average_CPS": 20.501029666666668, "config": { "model_name": "microsoft/MediPhi-Clinical", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 6.81, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.109999999999999, + "stderr": 0.0 } ], - "average_accuracy": 5.54, - "best_prompt": 6.81, - "prompt_id": "p2", - "CPS": 6.723513 + "average_accuracy": 6.063333333333333, + "best_prompt": 7.109999999999999, + "prompt_id": "p3", + "CPS": 7.035582 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json index e5cde9d20753a875bd2ceea41c35bb0ed36c6927..ab066b177276ec685bb3bf8c7d615f274fd7b17a 100644 --- a/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 36.3490175, + "average_CPS": 36.308323, "config": { "model_name": "microsoft/MediPhi-Clinical", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 9.610000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 14.219999999999999, + "stderr": 0.0 } ], - "average_accuracy": 15.375, + "average_accuracy": 14.99, "best_prompt": 21.14, "prompt_id": "p1", - "CPS": 19.921279000000002 + "CPS": 19.83989 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json index 1f4ddef202d128bacc0fb89d6ac6c5ac8b19d406..89a2aff72039f7036b3a0d1bb05567a8843aef7a 100644 --- a/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 26.618304, + "average_CPS": 28.317504, "config": { "model_name": "microsoft/MediPhi-Clinical", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 12.07, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.36, + "stderr": 0.0 } ], - "average_accuracy": 12.31, - "best_prompt": 12.55, - "prompt_id": "p1", - "CPS": 12.51988 + "average_accuracy": 13.660000000000002, + "best_prompt": 16.36, + "prompt_id": "p3", + "CPS": 15.91828 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json index a476b0b5b1655bcf1a1d266712f05f09d32a56f7..43aa5ad9534745978a76d5a8f0df5e3ef8ae7cc4 100644 --- a/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 23.377642833333333, + "average_CPS": 23.942156333333333, "config": { "model_name": "microsoft/MediPhi-Clinical", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 6.0600000000000005, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.24, + "stderr": 0.0 } ], - "average_accuracy": 5.575, - "best_prompt": 6.0600000000000005, - "prompt_id": "p2", - "CPS": 6.030609 + "average_accuracy": 6.13, + "best_prompt": 7.24, + "prompt_id": "p3", + "CPS": 7.159636 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json index 3fd457471a1622134fdbbcf9dcf3640eb3f5b25b..10336f2da374f0d4e3497ca52a60c3e897867991 100644 --- a/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 24.261933583333334, + "average_CPS": 24.260621, "config": { "model_name": "microsoft/MediPhi-Clinical", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 6.74, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.42, + "stderr": 0.0 } ], - "average_accuracy": 7.515000000000001, + "average_accuracy": 7.483333333333334, "best_prompt": 8.290000000000001, "prompt_id": "p1", - "CPS": 8.2257525 + "CPS": 8.223127333333334 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json index cb974e94763965f9431ddda2a910ecf491835ed0..d35e2e7548790ba37050b7b296ad22e195892857 100644 --- a/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 35.63120666666667, + "average_CPS": 35.686588, "config": { "model_name": "microsoft/MediPhi-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 18.88, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 18.360000000000003, + "stderr": 0.0 } ], - "average_accuracy": 16.6, + "average_accuracy": 17.186666666666667, "best_prompt": 18.88, "prompt_id": "p2", - "CPS": 18.449536000000002 + "CPS": 18.560298666666665 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json index 149452159a6757c747d997062330a34cc2d7812d..746a4d6809495e6b0b81e7ed14d30b71aba5f6ef 100644 --- a/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 18.171546, + "average_CPS": 18.663691, "config": { "model_name": "microsoft/MediPhi-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 6.74, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.7700000000000005, + "stderr": 0.0 } ], - "average_accuracy": 6.25, - "best_prompt": 6.74, - "prompt_id": "p2", - "CPS": 6.706974 + "average_accuracy": 6.756666666666667, + "best_prompt": 7.7700000000000005, + "prompt_id": "p3", + "CPS": 7.691264 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json index 24442aca65d8939081dc0f4f5b56077f2ff2a346..df3be99350ebf1b022cb709ae59908e2e428285d 100644 --- a/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 42.6923105, + "average_CPS": 42.82063783333333, "config": { "model_name": "microsoft/MediPhi-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 23.07, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 28.58, + "stderr": 0.0 } ], - "average_accuracy": 25.9, + "average_accuracy": 26.793333333333333, "best_prompt": 28.73, "prompt_id": "p1", - "CPS": 27.916941 + "CPS": 28.173595666666667 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json index ac551a4ccca8c6a3595810d2b8eeb92e3c63c894..53e1c256501493c36d30438e28047eda273d3b40 100644 --- a/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 35.0467105, + "average_CPS": 35.295837, "config": { "model_name": "microsoft/MediPhi-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 26.86, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 26.619999999999997, + "stderr": 0.0 } ], - "average_accuracy": 21.055, + "average_accuracy": 22.909999999999997, "best_prompt": 26.86, "prompt_id": "p2", - "CPS": 25.300776999999997 + "CPS": 25.79903 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json index 1e5bb73452f7a115703b74777cc6aa212671bae2..ca13dfd0624a5d2a282043f03582c65fef1f6743 100644 --- a/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 28.277421583333332, + "average_CPS": 28.662679833333332, "config": { "model_name": "microsoft/MediPhi-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 13.950000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 14.729999999999999, + "stderr": 0.0 } ], - "average_accuracy": 12.325, - "best_prompt": 13.950000000000001, - "prompt_id": "p2", - "CPS": 13.7233125 + "average_accuracy": 13.126666666666665, + "best_prompt": 14.729999999999999, + "prompt_id": "p3", + "CPS": 14.493828999999998 } } } \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json index 9c94f5e10660da5c8297852f6617cee1bc56e3b4..c6ee1d145af7f243f04d69dd9e274bafc5e23b01 100644 --- a/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 26.81930283333333, + "average_CPS": 29.24573433333333, "config": { "model_name": "microsoft/MediPhi-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 14.680000000000001, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 20.27, + "stderr": 0.0 } ], - "average_accuracy": 13.115000000000002, - "best_prompt": 14.680000000000001, - "prompt_id": "p2", - "CPS": 14.450258000000002 + "average_accuracy": 15.5, + "best_prompt": 20.27, + "prompt_id": "p3", + "CPS": 19.303121 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json index b4d5c267aa9fdedd0c8300bdec145f28a6cddb3b..901677072102f05031732c689b4caaa13f0f844c 100644 --- a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 42.61762233333333, + "average_CPS": 43.4870355, "config": { "model_name": "mistralai/Mistral-7B-Instruct-v0.2", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 40.339999999999996, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.48, + "stderr": 0.0 } ], - "average_accuracy": 33.135, - "best_prompt": 40.339999999999996, - "prompt_id": "p2", - "CPS": 37.433503 + "average_accuracy": 35.916666666666664, + "best_prompt": 41.48, + "prompt_id": "p3", + "CPS": 39.17232933333333 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json index 1d8702b1dea6caa05a6f0095e103342e26caa583..c781ff1f482edfb2dc8d1f751eae772debd3267f 100644 --- a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 29.018154000000003, + "average_CPS": 29.209499, "config": { "model_name": "mistralai/Mistral-7B-Instruct-v0.2", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 23.43, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 21.89, + "stderr": 0.0 } ], - "average_accuracy": 16.99, + "average_accuracy": 18.62333333333333, "best_prompt": 23.43, "prompt_id": "p2", - "CPS": 21.921108 + "CPS": 22.303798 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json index f294ab11c5ff34468871a1d005672671f8abd5f2..517fbc098c7e944fdca6ef2137b37251ff6f5fdc 100644 --- a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 47.034969, + "average_CPS": 47.367924, "config": { "model_name": "mistralai/Mistral-7B-Instruct-v0.2", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 45.300000000000004, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 42.52, + "stderr": 0.0 } ], - "average_accuracy": 38.11, + "average_accuracy": 39.580000000000005, "best_prompt": 45.300000000000004, "prompt_id": "p2", - "CPS": 42.04293 + "CPS": 42.70884 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json index f02babaae462403cb8fb4c5c3844604d6e868c2f..75e218546bbd71959c63f68f07b2b9178d4f2ac6 100644 --- a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 43.90926, + "average_CPS": 43.31202666666667, "config": { "model_name": "mistralai/Mistral-7B-Instruct-v0.2", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 33.11, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 26.83, + "stderr": 0.0 } ], - "average_accuracy": 36.03, + "average_accuracy": 32.96333333333333, "best_prompt": 38.95, "prompt_id": "p1", - "CPS": 37.81266 + "CPS": 36.61819333333333 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json index 26c3ccc0f04ecebef572fed9d721e222ecf46582..9989c0a065bd29a74bb0b47f1a054aff266bd0a9 100644 --- a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 30.650676250000004, + "average_CPS": 31.267611000000002, "config": { "model_name": "mistralai/Mistral-7B-Instruct-v0.2", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 19.48, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 22.93, + "stderr": 0.0 } ], - "average_accuracy": 20.515, - "best_prompt": 21.55, - "prompt_id": "p1", - "CPS": 21.326957500000002 + "average_accuracy": 21.32, + "best_prompt": 22.93, + "prompt_id": "p3", + "CPS": 22.560827 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json index 06ad9bf9476245acfbc22e97399b4d4521eb42c0..d06da082bdd17139af094b07794fcd5c2a9c15ff 100644 --- a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 30.92908, + "average_CPS": 31.471755, "config": { "model_name": "mistralai/Mistral-7B-Instruct-v0.2", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 19.5, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 21.15, + "stderr": 0.0 } ], - "average_accuracy": 19.700000000000003, - "best_prompt": 19.900000000000002, - "prompt_id": "p1", - "CPS": 19.860200000000003 + "average_accuracy": 20.183333333333334, + "best_prompt": 21.15, + "prompt_id": "p3", + "CPS": 20.94555 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json index 040b0ac7667da1f2f949a6ea3df406276253933d..6766f39c7bd90b083a82b24cc52307680638cfa3 100644 --- a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 52.1037285, + "average_CPS": 52.2740005, "config": { "model_name": "mistralai/Mistral-Nemo-Instruct-2407", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 50.080000000000005, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 44.49, + "stderr": 0.0 } ], - "average_accuracy": 42.45, + "average_accuracy": 43.13, "best_prompt": 50.080000000000005, "prompt_id": "p2", - "CPS": 46.258896 + "CPS": 46.59944 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json index cc4ef5b88c658086e726d65bb822ff7ee1f2ab9d..9fdec97b12d8d1492873635e1bfeebdb91048e33 100644 --- a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 36.652986500000004, + "average_CPS": 40.65579733333333, "config": { "model_name": "mistralai/Mistral-Nemo-Instruct-2407", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 22.96, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 33.23, + "stderr": 0.0 } ], - "average_accuracy": 21.625, - "best_prompt": 22.96, - "prompt_id": "p2", - "CPS": 22.653484000000002 + "average_accuracy": 25.49333333333333, + "best_prompt": 33.23, + "prompt_id": "p3", + "CPS": 30.65910566666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json index dfefd924d7c5cf422a1f35cded759cac49d4c45a..08e21b80d8341ace471b2f0bd68d9635b281304d 100644 --- a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 51.30399291666667, + "average_CPS": 51.6158585, "config": { "model_name": "mistralai/Mistral-Nemo-Instruct-2407", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 40.99, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 38.6, + "stderr": 0.0 } ], - "average_accuracy": 34.035, + "average_accuracy": 35.556666666666665, "best_prompt": 40.99, "prompt_id": "p2", - "CPS": 38.139145500000005 + "CPS": 38.76287666666667 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json index 4206fa687f230ece8058b8c1b4ae59d4f1167b0c..0858eae30fded0fd8fe8dae0f7e8da84b853d41c 100644 --- a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 36.291591, + "average_CPS": 36.890603, "config": { "model_name": "mistralai/Mistral-Nemo-Instruct-2407", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 18.55, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 20.01, + "stderr": 0.0 } ], - "average_accuracy": 18.59, - "best_prompt": 18.63, - "prompt_id": "p1", - "CPS": 18.622548 + "average_accuracy": 19.063333333333333, + "best_prompt": 20.01, + "prompt_id": "p3", + "CPS": 19.820572 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json index 8c45c7dce219c11c5835177a0fef0e3ce5e7fd57..c9cf698e283f1f943afe8db6c23ba7b85f5f8a00 100644 --- a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 35.501746499999996, + "average_CPS": 35.643439, "config": { "model_name": "mistralai/Mistral-Nemo-Instruct-2407", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 21.66, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 20.94, + "stderr": 0.0 } ], - "average_accuracy": 17.015, + "average_accuracy": 18.323333333333334, "best_prompt": 21.66, "prompt_id": "p2", - "CPS": 20.653893 + "CPS": 20.937278 } } } \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json index f87c68055b57f56199312bffa491c2af51c48c57..649e78cc8e120af115d138076515e54552cf01f5 100644 --- a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 36.59565525, + "average_CPS": 36.596855166666664, "config": { "model_name": "mistralai/Mistral-Nemo-Instruct-2407", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 20.57, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 17.27, + "stderr": 0.0 } ], - "average_accuracy": 17.235, + "average_accuracy": 17.246666666666666, "best_prompt": 20.57, "prompt_id": "p2", - "CPS": 19.8839905 + "CPS": 19.886390333333335 } } } \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json index 747bcb29604ec0e57014eebaa167529d276d4cb3..23950e8783aacd0b8a8ec07000e5a327b6aa8f2c 100644 --- a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 55.2351185, + "average_CPS": 55.751357999999996, "config": { "model_name": "tiiuae/Falcon3-10B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 55.86, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.15, + "stderr": 0.0 } ], - "average_accuracy": 49.605000000000004, + "average_accuracy": 51.45333333333334, "best_prompt": 55.86, "prompt_id": "p2", - "CPS": 52.365957 + "CPS": 53.398436000000004 } } } \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json index e3fb23fceb939e2d23d052f58cb289fc04356f7d..6b1b7aec32ec1dd607ebf47e8a0831d9666895a2 100644 --- a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 36.66668416666666, + "average_CPS": 36.36378083333333, "config": { "model_name": "tiiuae/Falcon3-10B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 37.55, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.68, + "stderr": 0.0 } ], - "average_accuracy": 37.519999999999996, + "average_accuracy": 35.906666666666666, "best_prompt": 37.55, "prompt_id": "p2", - "CPS": 37.538734999999996 + "CPS": 36.93292833333333 } } } \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json index 5bdf27224c5e6d54413ebd78381dab3272d8a4d7..63e48dbad5f85f4bb69b03b6bd5ae53b7fb7e6c6 100644 --- a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 54.68382, + "average_CPS": 55.480365666666664, "config": { "model_name": "tiiuae/Falcon3-10B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 54.58, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.97, + "stderr": 0.0 } ], - "average_accuracy": 50.4, - "best_prompt": 54.58, - "prompt_id": "p2", - "CPS": 52.298556 + "average_accuracy": 52.25666666666666, + "best_prompt": 55.97, + "prompt_id": "p3", + "CPS": 53.89164733333333 } } } \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json index 6a8fd7dcc417e06e14b79cdf6dd14de9c3745219..47845f40149bf7b1ea6d461838c06615f65e1825 100644 --- a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 48.62956716666666, + "average_CPS": 48.75862866666667, "config": { "model_name": "tiiuae/Falcon3-10B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 55.71, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 54.89000000000001, + "stderr": 0.0 } ], - "average_accuracy": 53.5, + "average_accuracy": 53.96333333333334, "best_prompt": 55.71, "prompt_id": "p2", - "CPS": 54.478809 + "CPS": 54.736932 } } } \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json index 6dbca7b6d5cf492013ee7048f39596c99d55f411..035aa9f55baa6e42440e9f396d4557a58d41b12b 100644 --- a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 44.76583875, + "average_CPS": 44.8562175, "config": { "model_name": "tiiuae/Falcon3-10B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 46.949999999999996, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 43.38, + "stderr": 0.0 } ], - "average_accuracy": 42.224999999999994, + "average_accuracy": 42.61, "best_prompt": 46.949999999999996, "prompt_id": "p2", - "CPS": 44.7316125 + "CPS": 44.912369999999996 } } } \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json index cd737fe4a1c1052ceecbdd7fb274d4327e6b42ad..bc1d01f3f803fdbb9dbefcfdea59c98d748cf150 100644 --- a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 35.00057433333333, + "average_CPS": 38.88441916666667, "config": { "model_name": "tiiuae/Falcon3-10B-Instruct", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 30.12, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.63, + "stderr": 0.0 } ], - "average_accuracy": 26.675, - "best_prompt": 30.12, - "prompt_id": "p2", - "CPS": 29.082366 + "average_accuracy": 31.326666666666668, + "best_prompt": 40.63, + "prompt_id": "p3", + "CPS": 36.85005566666667 } } } \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_EN.json b/e3c_llm_results/unsloth/phi-4_10_EN.json index eb0adde58a2ebdd3f982acfc747b7b4141139e11..2eca9798eb6ce904c24156d5df79987b7590d523 100644 --- a/e3c_llm_results/unsloth/phi-4_10_EN.json +++ b/e3c_llm_results/unsloth/phi-4_10_EN.json @@ -1,5 +1,5 @@ { - "average_CPS": 57.3466435, + "average_CPS": 57.6138785, "config": { "model_name": "unsloth/phi-4", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 56.26, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.54, + "stderr": 0.0 } ], - "average_accuracy": 52.69, + "average_accuracy": 53.64000000000001, "best_prompt": 56.26, "prompt_id": "p2", - "CPS": 54.251518 + "CPS": 54.785988 } } } \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_GR.json b/e3c_llm_results/unsloth/phi-4_10_GR.json index b5f9c45022f9b3c80b616f9bcd5699076c1b67ce..484557f271eb1144b8930243fdc0e9e5e56a0a5b 100644 --- a/e3c_llm_results/unsloth/phi-4_10_GR.json +++ b/e3c_llm_results/unsloth/phi-4_10_GR.json @@ -1,5 +1,5 @@ { - "average_CPS": 54.36022816666667, + "average_CPS": 55.776253, "config": { "model_name": "unsloth/phi-4", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 52.61, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 56.779999999999994, + "stderr": 0.0 } ], - "average_accuracy": 50.980000000000004, - "best_prompt": 52.61, - "prompt_id": "p2", - "CPS": 51.752457 + "average_accuracy": 52.913333333333334, + "best_prompt": 56.779999999999994, + "prompt_id": "p3", + "CPS": 54.58450666666666 } } } \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_IT.json b/e3c_llm_results/unsloth/phi-4_10_IT.json index c283cbddc646329830c9e162df8c39c84b103619..f5f563b3c379658ff5dbd08e0e2083c1ff6a853b 100644 --- a/e3c_llm_results/unsloth/phi-4_10_IT.json +++ b/e3c_llm_results/unsloth/phi-4_10_IT.json @@ -1,5 +1,5 @@ { - "average_CPS": 62.7994975, + "average_CPS": 62.7742775, "config": { "model_name": "unsloth/phi-4", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 58.199999999999996, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 56.879999999999995, + "stderr": 0.0 } ], - "average_accuracy": 57.14, + "average_accuracy": 57.053333333333335, "best_prompt": 58.199999999999996, "prompt_id": "p2", - "CPS": 57.58308 + "CPS": 57.53264 } } } \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_PL.json b/e3c_llm_results/unsloth/phi-4_10_PL.json index 3176e3d74c97d71f538a74afc95dc050bb323baa..0f726576d99c14670bfd06a98251a5c7c9a315d2 100644 --- a/e3c_llm_results/unsloth/phi-4_10_PL.json +++ b/e3c_llm_results/unsloth/phi-4_10_PL.json @@ -1,5 +1,5 @@ { - "average_CPS": 55.851632499999994, + "average_CPS": 56.63946383333333, "config": { "model_name": "unsloth/phi-4", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 57.599999999999994, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.72, + "stderr": 0.0 } ], - "average_accuracy": 55.915, - "best_prompt": 57.599999999999994, - "prompt_id": "p2", - "CPS": 56.62944 + "average_accuracy": 57.18333333333334, + "best_prompt": 59.72, + "prompt_id": "p3", + "CPS": 58.20510266666667 } } } \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_SK.json b/e3c_llm_results/unsloth/phi-4_10_SK.json index daddc042c466749fdbab28df910892ba9607541b..eae9b8f7dc903b7485ef003c47312654ce4c5036 100644 --- a/e3c_llm_results/unsloth/phi-4_10_SK.json +++ b/e3c_llm_results/unsloth/phi-4_10_SK.json @@ -1,5 +1,5 @@ { - "average_CPS": 53.08822666666667, + "average_CPS": 54.49931766666667, "config": { "model_name": "unsloth/phi-4", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 49.94, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.410000000000004, + "stderr": 0.0 } ], - "average_accuracy": 50.5, - "best_prompt": 51.06, - "prompt_id": "p1", - "CPS": 50.774064 + "average_accuracy": 52.13666666666666, + "best_prompt": 55.410000000000004, + "prompt_id": "p3", + "CPS": 53.596246 } } } \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_SL.json b/e3c_llm_results/unsloth/phi-4_10_SL.json index 1b84e88b895fcc0115b87c4d3644b7bc0ac09151..78e30ae98107c63ec0916e5a5b8d272498025244 100644 --- a/e3c_llm_results/unsloth/phi-4_10_SL.json +++ b/e3c_llm_results/unsloth/phi-4_10_SL.json @@ -1,5 +1,5 @@ { - "average_CPS": 53.913512000000004, + "average_CPS": 55.04669683333333, "config": { "model_name": "unsloth/phi-4", "num_fewshot": "10", @@ -52,12 +52,18 @@ "metric": "f1", "value": 52.32, "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.78999999999999, + "stderr": 0.0 } ], - "average_accuracy": 51.745000000000005, - "best_prompt": 52.32, - "prompt_id": "p2", - "CPS": 52.01916000000001 + "average_accuracy": 53.093333333333334, + "best_prompt": 55.78999999999999, + "prompt_id": "p3", + "CPS": 54.28552966666666 } } } \ No newline at end of file diff --git a/src/display/utils.py b/src/display/utils.py index 2b8ba28ffc154cd5b3b89f3adb27b0a73d6ea1ed..d1e82295f58d19e5b01ee13c870ee66e7fc2dc5c 100644 --- a/src/display/utils.py +++ b/src/display/utils.py @@ -103,7 +103,7 @@ class FewShotDetails: class FewShotType(Enum): ZS = FewShotDetails(name="zero-shot", symbol="0️⃣") - FS = FewShotDetails(name="5-few-shot", symbol="5️⃣") + FS = FewShotDetails(name="5-few-shot", symbol="🔟") Unknown = FewShotDetails(name="unknown", symbol="❓") def to_str(self, separator=" "):