+ This page presents the results of the FLaME evaluation across various financial NLP tasks. + Each tab shows performance metrics for different task categories. +
+Overall Performance Across All Tasks
+| Model | +Information Retrieval | +* | +Sentiment Analysis | +Causal Analysis | +Text Classification | +Question Answering | +Summarization | +|||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Dataset | +FiNER | +FR | +RD | +FNXL | +FE | +FiQA | +SQA | +FPB | +CD | +CC | +B77 | +FB | +FOMC | +NC | +HL | +CFQA | +FinQA | +TQA | +ECTSum | +EDTSum | +
| Metric Used | +F1 Score | +MSE | +F1 Score | +Accuracy | +BERTScore F1 | +|||||||||||||||
| Llama 3 70B Instruct | +.701 | .332 | .883 | .020 | +.469 | +.123 | .535 | .902 | +.142 | .192 | +.645 | .309 | .652 | .386 | .811 | +.709 | .809 | .772 | +.754 | .817 | +
| Llama 3 8B Instruct | +.565 | .289 | .705 | .003 | +.350 | +.161 | .600 | .698 | +.049 | .234 | +.512 | .659 | .497 | .511 | .763 | +.268 | .767 | .706 | +.757 | .811 | +
| DBRX Instruct | +.489 | .304 | .778 | .009 | +.006 | +.160 | .436 | .499 | +.087 | .231 | +.574 | .483 | .193 | .319 | .746 | +.252 | .738 | .633 | +.729 | .806 | +
| DeepSeek LLM (67B) | +.745 | .334 | .879 | .007 | +.416 | +.118 | .462 | .811 | +.025 | .193 | +.578 | .492 | .407 | .151 | .778 | +.174 | .742 | .355 | +.681 | .807 | +
| Gemma 2 27B | +.761 | .356 | .902 | .006 | +.298 | +.100 | .515 | .884 | +.133 | .242 | +.621 | .538 | .620 | .408 | .808 | +.268 | .768 | .734 | +.723 | .814 | +
| Gemma 2 9B | +.651 | .331 | .892 | .005 | +.367 | +.189 | .491 | .940 | +.105 | .207 | +.609 | .541 | .519 | .365 | .856 | +.292 | .779 | .750 | +.585 | .817 | +
| Mistral (7B) Instruct v0.3 | +.526 | .276 | .771 | .004 | +.368 | +.135 | .522 | .841 | +.052 | .227 | +.528 | .503 | .542 | .412 | .779 | +.199 | .655 | .553 | +.750 | .811 | +
| Mixtral-8x22B Instruct | +.635 | .367 | .811 | .009 | +.435 | +.221 | .510 | .776 | +.125 | .308 | +.602 | .221 | .465 | .513 | .835 | +.285 | .766 | .666 | +.758 | .815 | +
| Mixtral-8x7B Instruct | +.598 | .282 | .845 | .009 | +.267 | +.208 | .498 | .893 | +.055 | .229 | +.547 | .396 | .603 | .583 | .805 | +.315 | .611 | .501 | +.747 | .810 | +
| Qwen 2 Instruct (72B) | +.748 | .348 | .854 | .012 | +.483 | +.205 | .576 | .901 | +.190 | .184 | +.627 | .495 | .605 | .639 | .830 | +.269 | .819 | .715 | +.752 | .811 | +
| WizardLM-2 8x22B | +.744 | .355 | .852 | .008 | +.226 | +.129 | .566 | .779 | +.114 | .201 | +.648 | .500 | .505 | .272 | .797 | +.247 | .796 | .725 | +.735 | .808 | +
| DeepSeek-V3 | +.790 | .437 | .934 | .045 | +.549 | +.150 | .583 | .814 | +.198 | .170 | +.714 | .487 | .578 | .675 | .729 | +.261 | .840 | .779 | +.750 | .815 | +
| DeepSeek R1 | +.807 | .393 | .952 | .057 | +.587 | +.110 | .499 | .902 | +.337 | .202 | +.763 | .419 | .670 | .688 | .769 | +.853 | .836 | .858 | +.759 | .804 | +
| QwQ-32B-Preview | +.685 | .270 | .656 | .001 | +.005 | +.141 | .550 | .815 | +.131 | .220 | +.613 | .784 | .555 | .020 | .744 | +.282 | .793 | .796 | +.696 | .817 | +
| Jamba 1.5 Mini | +.552 | .284 | .844 | .005 | +.132 | +.119 | .418 | .765 | +.043 | .270 | +.508 | .898 | .499 | .151 | .682 | +.218 | .666 | .586 | +.741 | .816 | +
| Jamba 1.5 Large | +.693 | .341 | .862 | .005 | +.397 | +.183 | .582 | .798 | +.074 | .176 | +.628 | .618 | .550 | .541 | .782 | +.225 | .790 | .660 | +.734 | .818 | +
| Claude 3.5 Sonnet | +.799 | .439 | .891 | .047 | +.655 | +.101 | .553 | .944 | +.196 | .197 | +.668 | .634 | .674 | .692 | .827 | +.402 | .844 | .700 | +.767 | .813 | +
| Claude 3 Haiku | +.711 | .285 | .883 | .015 | +.494 | +.167 | .463 | .908 | +.081 | .200 | +.622 | .022 | .631 | .558 | .781 | +.421 | .803 | .733 | +.646 | .808 | +
| Cohere Command R 7B | +.748 | .194 | .845 | .018 | +.441 | +.164 | .532 | .840 | +.057 | .255 | +.516 | .762 | .459 | .068 | .770 | +.212 | .709 | .716 | +.750 | .815 | +
| Cohere Command R + | +.756 | .333 | .922 | .021 | +.452 | +.106 | .533 | .699 | +.080 | .238 | +.651 | .684 | .393 | .118 | .812 | +.259 | .776 | .698 | +.751 | .810 | +
| Google Gemini 1.5 Pro | +.712 | .374 | .944 | .019 | +.393 | +.144 | .593 | .885 | +.196 | .217 | +.418 | .336 | .579 | .525 | .837 | +.280 | .829 | .763 | +.777 | .817 | +
| OpenAI gpt-4o | +.766 | .399 | .942 | .037 | +.523 | +.184 | .541 | .928 | +.130 | .222 | +.710 | .524 | .664 | .750 | .824 | +.749 | .836 | .754 | +.773 | .816 | +
| OpenAI o1-mini | +.761 | .403 | .876 | .010 | +.662 | +.120 | .542 | .917 | +.289 | .209 | +.670 | .612 | .635 | .720 | .769 | +.840 | .799 | .698 | +.763 | .816 | +