| | <!doctype html> |
| | <html> |
| |
|
| | <head> |
| | <meta content="text/html; charset=UTF-8" http-equiv="content-type"> |
| | <style type="text/css"> |
| | .lst-kix_list_5-1>li { |
| | counter-increment: lst-ctn-kix_list_5-1 |
| | } |
| | |
| | ul.lst-kix_list_1-0 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_3-0>li:before { |
| | content: "\0025cf " |
| | } |
| | |
| | ul.lst-kix_list_5-7 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_5-8 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_3-1>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_3-2>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_5-5 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_5-6 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_5-0>li { |
| | counter-increment: lst-ctn-kix_list_5-0 |
| | } |
| | |
| | ul.lst-kix_list_1-3 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_3-5>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_1-4 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_1-1 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_3-4>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_1-2 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_5-3 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_1-7 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_3-3>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_5-4 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_1-8 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_1-5 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_5-2 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_1-6 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_3-8>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_3-6>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_3-7>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ol.lst-kix_list_5-0.start { |
| | counter-reset: lst-ctn-kix_list_5-0 0 |
| | } |
| | |
| | .lst-kix_list_5-0>li:before { |
| | content: "" counter(lst-ctn-kix_list_5-0, decimal) ". " |
| | } |
| | |
| | .lst-kix_list_4-8>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_5-3>li:before { |
| | content: "o " |
| | } |
| | |
| | .lst-kix_list_4-7>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_5-2>li:before { |
| | content: "\0025cf " |
| | } |
| | |
| | .lst-kix_list_5-1>li:before { |
| | content: "" counter(lst-ctn-kix_list_5-0, decimal) "." counter(lst-ctn-kix_list_5-1, decimal) " " |
| | } |
| | |
| | li.li-bullet-6:before { |
| | margin-left: -18pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 18pt |
| | } |
| | |
| | li.li-bullet-7:before { |
| | margin-left: -20.3pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 20.3pt |
| | } |
| | |
| | ul.lst-kix_list_4-8 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_5-7>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_4-6 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_5-6>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_5-8>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_4-7 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_4-0 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_4-1 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_5-4>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_4-4 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_5-5>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_4-5 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_4-2 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_4-3 { |
| | list-style-type: none |
| | } |
| | |
| | li.li-bullet-2:before { |
| | margin-left: -20.1pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 20.1pt |
| | } |
| | |
| | ol.lst-kix_list_5-0 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_2-6>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_2-7>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ol.lst-kix_list_5-1 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_2-4>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_2-5>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_2-8>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_3-7 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_3-8 { |
| | list-style-type: none |
| | } |
| | |
| | ol.lst-kix_list_5-1.start { |
| | counter-reset: lst-ctn-kix_list_5-1 0 |
| | } |
| | |
| | ul.lst-kix_list_3-1 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_3-2 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_3-0 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_3-5 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_3-6 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_3-3 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_3-4 { |
| | list-style-type: none |
| | } |
| | |
| | li.li-bullet-1:before { |
| | margin-left: -18pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 18pt |
| | } |
| | |
| | .lst-kix_list_4-0>li:before { |
| | content: "\0025cf " |
| | } |
| | |
| | .lst-kix_list_4-1>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | li.li-bullet-3:before { |
| | margin-left: -20.1pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 20.1pt |
| | } |
| | |
| | .lst-kix_list_4-4>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_4-3>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_4-5>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_4-2>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_4-6>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | li.li-bullet-8:before { |
| | margin-left: -13.4pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 13.4pt |
| | } |
| | |
| | li.li-bullet-9:before { |
| | margin-left: -13.4pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 13.4pt |
| | } |
| | |
| | li.li-bullet-5:before { |
| | margin-left: -18pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 18pt |
| | } |
| | |
| | ul.lst-kix_list_2-8 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_2-2 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_1-0>li:before { |
| | content: "\0025cf " |
| | } |
| | |
| | ul.lst-kix_list_2-3 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_2-0 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_2-1 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_2-6 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_1-1>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_1-2>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ul.lst-kix_list_2-7 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_2-4 { |
| | list-style-type: none |
| | } |
| | |
| | ul.lst-kix_list_2-5 { |
| | list-style-type: none |
| | } |
| | |
| | .lst-kix_list_1-3>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_1-4>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_1-7>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_1-5>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_1-6>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | li.li-bullet-0:before { |
| | margin-left: -12pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 12pt |
| | } |
| | |
| | li.li-bullet-4:before { |
| | margin-left: -20.1pt; |
| | white-space: nowrap; |
| | display: inline-block; |
| | min-width: 20.1pt |
| | } |
| | |
| | .lst-kix_list_2-0>li:before { |
| | content: "\0025cf " |
| | } |
| | |
| | .lst-kix_list_2-1>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_1-8>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_2-2>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | .lst-kix_list_2-3>li:before { |
| | content: "\002022 " |
| | } |
| | |
| | ol { |
| | margin: 0; |
| | padding: 0 |
| | } |
| | |
| | table td, |
| | table th { |
| | padding: 0 |
| | } |
| | |
| | .c10 { |
| | margin-left: 79pt; |
| | padding-top: 0.4pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 31.4pt |
| | } |
| | |
| | .c8 { |
| | color: #000000; |
| | font-weight: 400; |
| | text-decoration: none; |
| | vertical-align: baseline; |
| | font-size: 10pt; |
| | font-family: "Times New Roman"; |
| | font-style: normal |
| | } |
| | |
| | .c30 { |
| | margin-left: 79pt; |
| | padding-top: 0.1pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9874999999999999; |
| | text-align: left; |
| | margin-right: 3.1pt |
| | } |
| | |
| | .c20 { |
| | margin-left: 79pt; |
| | padding-top: 0.3pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 33.4pt |
| | } |
| | |
| | .c1 { |
| | color: #000000; |
| | font-weight: 400; |
| | text-decoration: none; |
| | vertical-align: baseline; |
| | font-size: 12pt; |
| | font-family: "Times New Roman"; |
| | font-style: normal |
| | } |
| | |
| | .c51 { |
| | margin-left: 79pt; |
| | padding-top: 0.3pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 45.3pt |
| | } |
| | |
| | .c78 { |
| | margin-left: 79pt; |
| | padding-top: 0.1pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 18.1pt |
| | } |
| | |
| | .c74 { |
| | margin-left: 43pt; |
| | padding-top: 13.9pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | margin-right: 15.5pt |
| | } |
| | |
| | .c73 { |
| | margin-left: 79pt; |
| | padding-top: 0.3pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 43.8pt |
| | } |
| | |
| | .c25 { |
| | margin-left: 79pt; |
| | padding-top: 0.3pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 39.5pt |
| | } |
| | |
| | .c79 { |
| | margin-left: 79pt; |
| | padding-top: 0.1pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 28.6pt |
| | } |
| | |
| | .c45 { |
| | margin-left: 79pt; |
| | padding-top: 4.2pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 9.2pt |
| | } |
| | |
| | .c12 { |
| | margin-left: 79pt; |
| | padding-top: 0.4pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 8.7pt |
| | } |
| | |
| | .c6 { |
| | color: #000000; |
| | font-weight: 700; |
| | text-decoration: none; |
| | vertical-align: baseline; |
| | font-size: 13.5pt; |
| | font-family: "Times New Roman"; |
| | font-style: normal |
| | } |
| | |
| | .c17 { |
| | margin-left: 43pt; |
| | padding-top: 0.1pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | margin-right: 38pt |
| | } |
| | |
| | .c54 { |
| | margin-left: 79pt; |
| | padding-top: 0.4pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left; |
| | margin-right: 36.8pt |
| | } |
| | |
| | .c47 { |
| | margin-left: 43pt; |
| | padding-top: 3.8pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | margin-right: 11.2pt |
| | } |
| | |
| | .c19 { |
| | margin-left: 43pt; |
| | padding-top: 0.1pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | margin-right: 12.1pt |
| | } |
| | |
| | .c13 { |
| | margin-left: 43pt; |
| | padding-top: 0.1pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | margin-right: 8.9pt |
| | } |
| | |
| | .c3 { |
| | color: #000000; |
| | font-weight: 700; |
| | text-decoration: none; |
| | vertical-align: baseline; |
| | font-size: 12pt; |
| | font-family: "Times New Roman"; |
| | font-style: normal |
| | } |
| | .c3-big { |
| | color: #000000; |
| | font-weight: 700; |
| | text-decoration: none; |
| | vertical-align: baseline; |
| | font-size: 15pt; |
| | font-family: "Times New Roman"; |
| | font-style: normal |
| | } |
| | |
| | .c37 { |
| | margin-left: 43pt; |
| | padding-top: 3.8pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | margin-right: 3.4pt |
| | } |
| | |
| | .c68 { |
| | margin-left: 7pt; |
| | padding-top: 13.9pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: justify; |
| | margin-right: 18.8pt |
| | } |
| | |
| | .c39 { |
| | margin-left: 79pt; |
| | padding-top: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.1666666666666667; |
| | padding-left: -0pt; |
| | text-align: left |
| | } |
| | |
| | .c26 { |
| | margin-left: 25.1pt; |
| | padding-top: 0.1pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | padding-left: 2.1pt; |
| | text-align: left |
| | } |
| | |
| | .c23 { |
| | margin-left: 43pt; |
| | padding-top: 0.1pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | padding-left: 0pt; |
| | text-align: left |
| | } |
| | |
| | .c0 { |
| | margin-left: 43pt; |
| | padding-top: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | padding-left: 0pt; |
| | text-align: left |
| | } |
| | |
| | .c58 { |
| | margin-left: 11.7pt; |
| | padding-top: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: center; |
| | margin-right: -2.9pt |
| | } |
| | |
| | .c34 { |
| | margin-left: 25.1pt; |
| | padding-top: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | padding-left: -4.6pt; |
| | text-align: left |
| | } |
| | |
| | .c82 { |
| | margin-left: 79pt; |
| | padding-top: 0.3pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 0.9666666666666667; |
| | text-align: left |
| | } |
| | |
| | .c50 { |
| | margin-left: 25.1pt; |
| | padding-top: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | padding-left: 2.3pt; |
| | text-align: left |
| | } |
| | |
| | .c24 { |
| | margin-left: 25.1pt; |
| | padding-top: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | padding-left: -6pt; |
| | text-align: left |
| | } |
| | |
| | .c67 { |
| | margin-left: 43pt; |
| | padding-top: 0pt; |
| | padding-left: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: justify |
| | } |
| | |
| | .c11 { |
| | margin-left: 25.1pt; |
| | padding-top: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | padding-left: 2.1pt; |
| | text-align: left |
| | } |
| | |
| | .c36 { |
| | margin-left: 7pt; |
| | padding-top: 3.8pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left |
| | } |
| | |
| | .c70 { |
| | margin-left: 7pt; |
| | padding-top: 14pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: justify |
| | } |
| | |
| | .c18 { |
| | margin-left: 7pt; |
| | padding-top: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left |
| | } |
| | |
| | .c9 { |
| | padding-top: 0.1pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | height: 11pt |
| | } |
| | |
| | .c15 { |
| | margin-left: 7pt; |
| | padding-top: 14pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left |
| | } |
| | |
| | .c33 { |
| | margin-left: 79pt; |
| | padding-top: 0pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.1333333333333333; |
| | text-align: left |
| | } |
| | |
| | .c7 { |
| | padding-top: 0.3pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | height: 11pt |
| | } |
| | |
| | .c2 { |
| | padding-top: 0.2pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | height: 11pt |
| | } |
| | |
| | .c22 { |
| | margin-left: 7pt; |
| | padding-top: 13.9pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left |
| | } |
| | |
| | .c53 { |
| | padding-top: 2.6pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | height: 11pt |
| | } |
| | |
| | .c49 { |
| | padding-top: 5.8pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left; |
| | height: 11pt |
| | } |
| | |
| | .c14 { |
| | margin-left: 7pt; |
| | padding-top: 0.1pt; |
| | padding-bottom: 0pt; |
| | line-height: 1.0; |
| | text-align: left |
| | } |
| | |
| | .c65 { |
| | background-color: #ffffff; |
| | padding: 72pt 72pt 72pt 72pt |
| | } |
| | |
| | .c61 { |
| | font-size: 12pt; |
| | font-weight: 700 |
| | } |
| | |
| | .c77 { |
| | list-style-position: inside; |
| | text-indent: 45pt |
| | } |
| | |
| | .c5 { |
| | padding: 0; |
| | margin: 0 |
| | } |
| | |
| | .c28 { |
| | margin-right: 29.9pt |
| | } |
| | |
| | .c29 { |
| | margin-right: 33pt |
| | } |
| | |
| | .c35 { |
| | margin-right: 15.1pt |
| | } |
| | |
| | .c83 { |
| | margin-right: 13.3pt |
| | } |
| | |
| | .c80 { |
| | margin-right: 11.8pt |
| | } |
| | |
| | .c48 { |
| | margin-right: 10.9pt |
| | } |
| | |
| | .c21 { |
| | margin-right: 53.4pt |
| | } |
| | |
| | .c59 { |
| | margin-right: 17.8pt |
| | } |
| | |
| | .c16 { |
| | margin-right: 31pt |
| | } |
| | |
| | .c81 { |
| | margin-right: 20.4pt |
| | } |
| | |
| | .c71 { |
| | margin-right: 30.1pt |
| | } |
| | |
| | .c44 { |
| | margin-right: 25.6pt |
| | } |
| | |
| | .c72 { |
| | margin-right: 4.5pt |
| | } |
| | |
| | .c75 { |
| | margin-right: 17pt |
| | } |
| | |
| | .c64 { |
| | margin-right: 1.9pt |
| | } |
| | |
| | .c46 { |
| | margin-right: 6.2pt |
| | } |
| | |
| | .c62 { |
| | margin-right: 1.3pt |
| | } |
| | |
| | .c4 { |
| | margin-right: 18.9pt |
| | } |
| | |
| | .c55 { |
| | margin-right: 36.2pt |
| | } |
| | |
| | .c57 { |
| | margin-right: 7.8pt |
| | } |
| | |
| | .c42 { |
| | margin-right: 3.4pt |
| | } |
| | |
| | .c32 { |
| | margin-right: 19.1pt |
| | } |
| | |
| | .c41 { |
| | margin-right: 7.2pt |
| | } |
| | |
| | .c56 { |
| | margin-right: 8.2pt |
| | } |
| | |
| | .c69 { |
| | margin-right: 0.6pt |
| | } |
| | |
| | .c27 { |
| | margin-right: 4.2pt |
| | } |
| | |
| | .c43 { |
| | margin-right: 38.4pt |
| | } |
| | |
| | .c38 { |
| | margin-right: 5pt |
| | } |
| | |
| | .c40 { |
| | margin-right: 38.9pt |
| | } |
| | |
| | .c60 { |
| | margin-right: 45.6pt |
| | } |
| | |
| | .c31 { |
| | margin-right: 5.8pt |
| | } |
| | |
| | .c76 { |
| | margin-right: 10.2pt |
| | } |
| | |
| | .c52 { |
| | margin-right: 13.1pt |
| | } |
| | |
| | .c66 { |
| | margin-right: 9.7pt |
| | } |
| | |
| | .c63 { |
| | margin-right: 7.4pt |
| | } |
| | |
| | .c85 { |
| | margin-right: 1pt |
| | } |
| | |
| | .c84 { |
| | margin-right: 55.5pt |
| | } |
| | |
| | .title { |
| | padding-top: 24pt; |
| | color: #000000; |
| | font-weight: 700; |
| | font-size: 36pt; |
| | padding-bottom: 6pt; |
| | font-family: "Times New Roman"; |
| | line-height: 1.0; |
| | page-break-after: avoid; |
| | text-align: left |
| | } |
| | |
| | .subtitle { |
| | padding-top: 18pt; |
| | color: #666666; |
| | font-size: 24pt; |
| | padding-bottom: 4pt; |
| | font-family: "Georgia"; |
| | line-height: 1.0; |
| | page-break-after: avoid; |
| | font-style: italic; |
| | text-align: left |
| | } |
| | |
| | li { |
| | color: #000000; |
| | font-size: 11pt; |
| | font-family: "Times New Roman" |
| | } |
| | |
| | p { |
| | margin: 0; |
| | color: #000000; |
| | font-size: 11pt; |
| | font-family: "Times New Roman" |
| | } |
| | |
| | h1 { |
| | padding-top: 0pt; |
| | color: #000000; |
| | font-weight: 700; |
| | font-size: 13.5pt; |
| | padding-bottom: 0pt; |
| | font-family: "Times New Roman"; |
| | line-height: 1.0; |
| | text-align: left |
| | } |
| | |
| | h2 { |
| | padding-top: 0pt; |
| | color: #000000; |
| | font-weight: 700; |
| | font-size: 12pt; |
| | padding-bottom: 0pt; |
| | font-family: "Times New Roman"; |
| | line-height: 1.0; |
| | text-align: left |
| | } |
| | |
| | h3 { |
| | padding-top: 14pt; |
| | color: #000000; |
| | font-weight: 700; |
| | font-size: 14pt; |
| | padding-bottom: 4pt; |
| | font-family: "Times New Roman"; |
| | line-height: 1.0; |
| | page-break-after: avoid; |
| | text-align: left |
| | } |
| | |
| | h4 { |
| | padding-top: 12pt; |
| | color: #000000; |
| | font-weight: 700; |
| | font-size: 12pt; |
| | padding-bottom: 2pt; |
| | font-family: "Times New Roman"; |
| | line-height: 1.0; |
| | page-break-after: avoid; |
| | text-align: left |
| | } |
| | |
| | h5 { |
| | padding-top: 11pt; |
| | color: #000000; |
| | font-weight: 700; |
| | font-size: 11pt; |
| | padding-bottom: 2pt; |
| | font-family: "Times New Roman"; |
| | line-height: 1.0; |
| | page-break-after: avoid; |
| | text-align: left |
| | } |
| | |
| | h6 { |
| | padding-top: 10pt; |
| | color: #000000; |
| | font-weight: 700; |
| | font-size: 10pt; |
| | padding-bottom: 2pt; |
| | font-family: "Times New Roman"; |
| | line-height: 1.0; |
| | page-break-after: avoid; |
| | text-align: left |
| | } |
| | </style> |
| | </head> |
| |
|
| | <body class="c65 doc-content"> |
| | <h2 class="c36"><span class="c3-big">Evaluating Generalist LLMs on Legal Knowledge Tasks: A Comprehensive |
| | Benchmark Integrating Bloom’s Taxonomy on IT Regulations</span></h2> |
| | <p class="c2"><span class="c3"></span></p> |
| | <p class="c18 c72"><span class="c3">Abstract: </span><span class="c1">This paper introduces a comprehensive |
| | benchmark designed to rigorously evaluate general-purpose Large Language Models (LLMs) within legal |
| | contexts, with a particular emphasis on regulatory compliance in Information Technology (IT). By employing |
| | Bloom’s taxonomy as a structural framework, we systematically organize evaluation tasks across |
| | cognitive dimensions—memorization, understanding, analyzing, applying, evaluating, and creating. This |
| | benchmark features a meticulously constructed dataset containing multiple- choice questions (MCQs) and |
| | innovative open-ended analytical tasks tailored for legal analysis. Furthermore, we outline the creation of |
| | an associated training dataset using this structured approach to improve the practical performance of LLMs, |
| | initially focusing on IT regulations and subsequently extending to broader regulatory domains.</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-0 start" start="1"> |
| | <li class="c24 li-bullet-0"><span class="c3">Introduction: </span><span class="c1">Effective legal reasoning and |
| | regulatory compliance are essential yet cognitively demanding areas where precision and comprehensive |
| | understanding are paramount. Existing benchmarks, such as LEXam (2025), LegalBench (2023), LLeQA (2023), |
| | and LexEval (2024), have contributed significantly to assessing legal reasoning capabilities but |
| | typically lack detailed cognitive structuring or domain-specific regulatory contexts. Historically, |
| | Bloom’s taxonomy has been instrumental in structuring cognitive assessments, providing a |
| | systematic framework to evaluate a range of cognitive tasks essential for nuanced legal reasoning. |
| | Extending this cognitive framework explicitly into IT regulatory scenarios addresses existing gaps by |
| | providing a detailed and structured assessment method specifically tailored for evaluating generalist |
| | LLM capabilities.</span></li> |
| | </ol> |
| | <p class="c9"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-0" start="2"> |
| | <li class="c24 li-bullet-0"><span class="c3">Related Work: </span><span class="c1">Previous influential |
| | benchmarks include:</span></li> |
| | </ol> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_4-0 start"> |
| | <li class="c0 c55 li-bullet-1"><span class="c3">LEXam (2025)</span><span class="c1">: Focused on traditional |
| | legal examination formats emphasizing cognitive recall and comprehension through MCQs and open-ended |
| | questions.</span></li> |
| | <li class="c0 c64 li-bullet-1"><span class="c3">LegalBench (2023)</span><span class="c1">: Provided in-depth |
| | evaluations through varied legal analytical and practical application scenarios.</span></li> |
| | <li class="c0 c46 li-bullet-1"><span class="c3">LLeQA (2023)</span><span class="c1">: Specialized in assessing |
| | textual coherence and detailed interpretative reasoning.</span></li> |
| | <li class="c0 c32 li-bullet-1"><span class="c3">LexEval (2024)</span><span class="c1">: Evaluated analytical |
| | proficiency extensively within Chinese legal contexts.</span></li> |
| | </ul> |
| | <p class="c9"><span class="c1"></span></p> |
| | <p class="c18 c76"><span class="c1">Our benchmark synthesizes these approaches and introduces specialized IT |
| | regulatory tasks, effectively combining cognitive rigor with practical regulatory relevance.</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-0" start="3"> |
| | <li class="c24 li-bullet-0"> |
| | <h2 style="display:inline"><span class="c3">Dataset Construction:</span></h2> |
| | </li> |
| | </ol> |
| | <p class="c2"><span class="c3"></span></p> |
| | <p class="c18"><span class="c1">Our dataset integrates Bloom’s taxonomy, meticulously structured to evaluate |
| | various cognitive abilities within legal contexts, especially focusing on Information Technology (IT) |
| | regulations. The dataset comprises three principal question types designed to thoroughly evaluate LLM |
| | capabilities:</span></p> |
| | <p class="c9"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1 start" start="1"> |
| | <li class="c26 li-bullet-2"> |
| | <h1 style="display:inline"><span class="c6">Multiple-Choice Questions (MCQs):</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c36"><span class="c1">MCQs are carefully crafted to assess foundational cognitive skills such as |
| | memorization and basic comprehension of legal texts and regulatory standards. Questions explicitly target |
| | recall accuracy, terminology clarity, and immediate interpretive abilities.</span></p> |
| | <p class="c7"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2 start"> |
| | <li class="c0 c62 li-bullet-1"><span class="c3">Principle: </span><span class="c1">MCQs assess precise recall |
| | and basic understanding of legal provisions and regulatory terms.</span></li> |
| | <li class="c0 li-bullet-1"><span class="c3">Metrics: </span><span class="c1">Accuracy rate, response |
| | consistency, and recall precision.</span></li> |
| | </ul> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="2"> |
| | <li class="c11 li-bullet-3"> |
| | <h1 style="display:inline"><span class="c6">General Open-Ended Questions:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c15 c69"><span class="c1">These tasks require LLMs to demonstrate intermediate cognitive skills, including |
| | interpretation, analysis, and application of broader legal and regulatory principles. General open-ended |
| | questions typically present scenarios without sector-specific constraints, assessing the model’s |
| | ability to reason and articulate legal arguments clearly and accurately.</span></p> |
| | <p class="c9"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c23 li-bullet-1"><span class="c3">Principle: </span><span class="c1">Assess analytical depth, |
| | interpretative coherence, and application accuracy.</span></li> |
| | <li class="c0 c52 li-bullet-1"><span class="c3">Metrics: </span><span class="c1">Precision of legal reasoning, |
| | interpretative coherence (BLEU and ROUGE scores), and qualitative evaluation by legal experts.</span> |
| | </li> |
| | </ul> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="3"> |
| | <li class="c11 li-bullet-4"> |
| | <h1 style="display:inline"><span class="c6">Domain-Specific Analytical Questions:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c22"><span class="c1">These tasks represent the core of our innovative dataset, structured to rigorously |
| | evaluate advanced cognitive skills such as deep analysis, critical evaluation, and creative synthesis within |
| | specific regulatory contexts (initially IT regulation). Each task explicitly aligns with Bloom's |
| | taxonomy:</span></p> |
| | <p class="c7"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c0 c43 li-bullet-1"><span class="c3">Analyzing: </span><span class="c1">Dissect complex regulatory |
| | scenarios, identify critical legal issues, compliance requirements, and their implications.</span></li> |
| | <li class="c0 c44 li-bullet-1"><span class="c3">Applying: </span><span class="c1">Direct application of legal |
| | frameworks to practical scenarios, ensuring regulatory adherence.</span></li> |
| | <li class="c0 c4 li-bullet-1"><span class="c3">Evaluating: </span><span class="c1">Critical assessment tasks |
| | involving analysis of legal jurisdictions, risk assessments, and forecasting legal outcomes.</span></li> |
| | <li class="c0 c16 li-bullet-1"><span class="c3">Creating: </span><span class="c1">Generative tasks requiring the |
| | development of innovative compliance strategies and detailed policy recommendations.</span></li> |
| | </ul> |
| | <p class="c9"><span class="c1"></span></p> |
| | <p class="c18"><span class="c1">Example Task:</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c0 c41 li-bullet-1"><span class="c3">Context: </span><span class="c1">AI CNIL v. Doctissimo (2023): |
| | CNIL sanctions Doctissimo for breaches of GDPR through online symptom checker algorithms.</span></li> |
| | <li class="c23 c66 li-bullet-1"><span class="c3">Facts: </span><span class="c1">Doctissimo operated an online |
| | symptom checker processing users' health data without explicit consent, breaching data minimization |
| | principles, and retaining sensitive data excessively.</span></li> |
| | <li class="c0 li-bullet-1"><span class="c3">Legal Rules: </span><span class="c1">Articles 6, 9, and 5 GDPR; EU |
| | AI Act Recital 38 and Article 10.</span></li> |
| | <li class="c0 c75 li-bullet-1"><span class="c3">Expected Conditions: </span><span class="c1">Explicit consent; |
| | appropriate data retention; transparency in automated decision-making.</span></li> |
| | <li class="c0 c42 li-bullet-1"><span class="c3">Application Explanation: </span><span class="c1">CNIL identified |
| | violations due to the lack of explicit consent, insufficient user information, and excessive data |
| | retention, highlighting high- risk implications under AI regulatory standards.</span></li> |
| | <li class="c0 c21 li-bullet-1"><span class="c3">Expected Result: </span><span class="c1">A fine and mandated |
| | corrective measures for compliance improvement.</span></li> |
| | <li class="c47 li-bullet-5"><span class="c3">Case Reference: </span><span class="c1">CNIL Sanction Decision No. |
| | SAN-2023-001, Doctissimo, January 2023.</span></li> |
| | <li class="c23 c83 li-bullet-1"><span class="c3">Counterfactual Scenario: </span><span class="c1">Implementation |
| | of anonymized data processing methods and metadata collection only.</span></li> |
| | <li class="c0 c29 li-bullet-5"><span class="c3">New Fact: </span><span class="c1">Redesigned system |
| | incorporating anonymized user behavior logging without identifiable data.</span></li> |
| | <li class="c0 c31 li-bullet-1"><span class="c3">Expected Outcome: </span><span class="c1">Compliance with GDPR |
| | and lower risk categorization under the AI Act.</span></li> |
| | <li class="c0 li-bullet-1"><span class="c3">Arguments Pro: </span><span class="c1">Promotes digital health |
| | innovation, rapid diagnostic access for users.</span></li> |
| | <li class="c0 li-bullet-5"><span class="c3">Arguments Contra: </span><span class="c1">Privacy violations, |
| | consent breaches, and lack of transparency.</span></li> |
| | <li class="c0 c84 li-bullet-1"><span class="c3">Paraphrases: </span><span class="c1">Automated health data |
| | processing requires explicit consent, transparency, and strict adherence to GDPR and AI regulatory |
| | frameworks.</span></li> |
| | </ul> |
| | <p class="c7"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="4"> |
| | <li class="c11 li-bullet-4"> |
| | <h1 style="display:inline"><span class="c6">Scoring Methodology and Metrics:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c22"><span class="c1">Each task type employs specific scoring methodologies and metrics:</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c0 li-bullet-5"><span class="c3">MCQs: </span><span class="c1">Objective accuracy and recall.</span> |
| | </li> |
| | <li class="c0 c27 li-bullet-1"><span class="c3">General Open-Ended Questions: </span><span |
| | class="c1">Qualitative expert reviews, coherence metrics, and interpretative accuracy.</span></li> |
| | <li class="c0 c80 li-bullet-5"><span class="c3">Domain-Specific Analytical Questions: </span><span |
| | class="c1">Multi-dimensional assessments combining legal accuracy, interpretive depth, cognitive rigor, |
| | and practical applicability scores from experts.</span></li> |
| | </ul> |
| | <p class="c9"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-0" start="4"> |
| | <li class="c24 li-bullet-0"> |
| | <h2 style="display:inline"><span class="c3">Experimental Setup:</span></h2> |
| | </li> |
| | </ol> |
| | <p class="c2"><span class="c3"></span></p> |
| | <p class="c18"><span class="c1">Our experimental setup adopts a rigorous evaluation framework that combines |
| | customized metrics aligned explicitly with task types and cognitive levels defined by Bloom’s |
| | taxonomy. Additionally, we incorporate metrics specific to the inherent nature of AI computational tasks, |
| | namely Generation, Extraction, Single-Label Classification (SLC), and Multi-Label Classification |
| | (MLC).</span></p> |
| | <p class="c7"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="5"> |
| | <li class="c11 li-bullet-3"> |
| | <h1 style="display:inline"><span class="c6">Customized Metrics by Task Type and Cognitive Level:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c22"><span class="c1">To effectively evaluate the LLM performance across diverse legal cognitive tasks, we |
| | introduce metrics tailored specifically to each cognitive dimension and task format:</span></p> |
| | <p class="c9"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c0 li-bullet-1"> |
| | <h2 style="display:inline"><span class="c3">Memorization (MCQs):</span></h2> |
| | </li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-3 start"> |
| | <li class="c39 li-bullet-6"><span class="c3">Accuracy: </span><span class="c1">Percentage of correctly answered |
| | MCQs.</span></li> |
| | <li class="c78 li-bullet-1"><span class="c3">Precision and Recall: </span><span class="c1">Assessed for |
| | questions involving terminological and definitional clarity.</span></li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c23 li-bullet-1"> |
| | <h2 style="display:inline"><span class="c3">Understanding (General Open-Ended Questions):</span></h2> |
| | </li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-3 start"> |
| | <li class="c75 c82 li-bullet-1"><span class="c3">BLEU and ROUGE Scores: </span><span class="c1">Evaluate the |
| | coherence and fluency of textual responses.</span></li> |
| | <li class="c10 li-bullet-1"><span class="c3">Legal Concept Accuracy: </span><span class="c1">Expert-validated |
| | accuracy in interpreting and expressing legal principles.</span></li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c23 li-bullet-5"> |
| | <h2 style="display:inline"><span class="c3">Analyzing and Applying (Domain-Specific Analytical |
| | Tasks):</span></h2> |
| | </li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-3 start"> |
| | <li class="c73 li-bullet-1"><span class="c3">Analytical Depth Score: </span><span class="c1">Expert-graded scale |
| | assessing depth of legal analysis.</span></li> |
| | <li class="c45 li-bullet-5"><span class="c3">Compliance Application Accuracy: </span><span class="c1">Percentage |
| | accuracy based on correctly identified and applied regulatory criteria.</span></li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c23 li-bullet-6"> |
| | <h2 style="display:inline"><span class="c3">Evaluating and Creating (Advanced Domain-Specific Tasks):</span> |
| | </h2> |
| | </li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-3 start"> |
| | <li class="c39 li-bullet-6"><span class="c3">Risk Assessment Accuracy: </span><span class="c1">Evaluates the |
| | model’s ability to correctly identify</span></li> |
| | </ul> |
| | <p class="c33"><span class="c1">and assess compliance risks.</span></p> |
| | <ul class="c5 lst-kix_list_5-3"> |
| | <li class="c20 li-bullet-1"><span class="c3">Creativity and Practicality Score: </span><span class="c1">Expert |
| | qualitative assessment rating innovative legal solutions and strategies.</span></li> |
| | </ul> |
| | <p class="c7"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="6"> |
| | <li class="c11 li-bullet-4"> |
| | <h1 style="display:inline"><span class="c6">Metrics Derived by AI Task Nature:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c32 c70"><span class="c1">To further refine evaluations, we incorporate metrics categorized according to |
| | the intrinsic nature of the AI computations involved in each task:</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c0 li-bullet-1"> |
| | <h2 style="display:inline"><span class="c3">Generation Tasks (e.g., Creating):</span></h2> |
| | </li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-3 start"> |
| | <li class="c25 li-bullet-5"><span class="c3">Coherence and Fluency (BLEU, ROUGE): </span><span |
| | class="c1">Measures readability and semantic coherence.</span></li> |
| | <li class="c54 li-bullet-1"><span class="c3">Novelty Index: </span><span class="c1">Expert-assessed originality |
| | and innovation in generated responses.</span></li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c23 li-bullet-1"> |
| | <h2 style="display:inline"><span class="c3">Extraction Tasks (e.g., Understanding, Analyzing):</span></h2> |
| | </li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-3 start"> |
| | <li class="c30 li-bullet-5"><span class="c3">Extraction Precision and Recall: </span><span |
| | class="c1">Quantitative metrics assessing the model's capability to accurately identify and extract |
| | relevant legal facts, principles, and issues from textual scenarios.</span></li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c0 c35 li-bullet-1"> |
| | <h2 style="display:inline"><span class="c3">Single-Label Classification (SLC) Tasks (MCQs, simple regulatory |
| | compliance checks):</span></h2> |
| | </li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-3 start"> |
| | <li class="c39 li-bullet-1"><span class="c3">Accuracy Rate: </span><span class="c1">Proportion of correct |
| | single-classification responses.</span></li> |
| | <li class="c79 li-bullet-1"><span class="c3">Confidence Score Distribution: </span><span class="c1">Statistical |
| | analysis of model confidence levels for correct vs. incorrect classifications.</span></li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c17 li-bullet-5"> |
| | <h2 style="display:inline"><span class="c3">Multi-Label Classification (MLC) Tasks (complex regulatory |
| | assessments, compliance verification tasks):</span></h2> |
| | </li> |
| | </ul> |
| | <ul class="c5 lst-kix_list_5-3 start"> |
| | <li class="c51 li-bullet-5"><span class="c3">Multi-label Accuracy (Subset Accuracy): </span><span |
| | class="c1">Evaluates the precision in identifying multiple applicable legal criteria |
| | simultaneously.</span></li> |
| | <li class="c12 li-bullet-5"><span class="c3">F1-Score (Macro and Micro): </span><span class="c1">Assesses |
| | balanced precision and recall across multiple regulatory criteria.</span></li> |
| | </ul> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="7"> |
| | <li class="c11 li-bullet-2"> |
| | <h1 style="display:inline"><span class="c6">Scoring and Expert Review Methodology:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c68"><span class="c1">All quantitative metrics are complemented by qualitative expert evaluations. Legal |
| | experts systematically review a representative sample of generated outputs, scoring them based on accuracy, |
| | interpretive validity, regulatory relevance, and practical applicability.</span></p> |
| | <p class="c7"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c0 c60 li-bullet-1"><span class="c3">Expert Validity Rating: </span><span class="c1">Scale of |
| | 1–5 assessing overall legal correctness and applicability.</span></li> |
| | <li class="c0 c28 li-bullet-1"><span class="c3">Regulatory Compliance Score: </span><span |
| | class="c1">Expert-rated accuracy of models in identifying, evaluating, and applying regulatory criteria |
| | correctly.</span></li> |
| | </ul> |
| | <p class="c9"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="8"> |
| | <li class="c50 li-bullet-7"> |
| | <h1 style="display:inline"><span class="c6">Tooling and Technical Considerations:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c22"><span class="c1">In addition to benchmark-level assessments, our SaaS platform “Jessica” |
| | by Contractzlab was</span></p> |
| | <p class="c18"><span class="c1">employed to support human annotation and evaluation. Jessica integrates a |
| | compliance</span></p> |
| | <p class="c36"><span class="c1">scoring engine tailored to regulatory assessment logic. This system enables precise |
| | scoring of model responses against predefined rules and compliance thresholds, including:</span></p> |
| | <p class="c7"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c0 li-bullet-1"><span class="c1">Legal principle matching.</span></li> |
| | <li class="c0 li-bullet-5"><span class="c1">Alignment with sector-specific obligations.</span></li> |
| | <li class="c0 li-bullet-1"><span class="c1">Scoring based on automated legal heuristics.</span></li> |
| | </ul> |
| | <p class="c2"><span class="c1"></span></p> |
| | <p class="c14"><span class="c61">Computational Efficiency: </span><span class="c1">Average inference time per |
| | question type/task nature.</span></p> |
| | <p class="c9"><span class="c1"></span></p> |
| | <p class="c18"><span class="c61">Scalability and Generalizability: </span><span class="c1">Assessed through task |
| | performance consistency across multiple regulatory domains and contexts.</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <p class="c18"><span class="c1">This comprehensive experimental setup, integrating tailored cognitive and |
| | computational metrics, ensures a nuanced, robust, and practical evaluation of generalist LLM performance in |
| | legal and regulatory tasks, providing clear guidance for model improvements and practical |
| | deployments.</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-0" start="5"> |
| | <li class="c24 li-bullet-0"> |
| | <h2 style="display:inline"><span class="c3">Results and Analysis:</span></h2> |
| | </li> |
| | </ol> |
| | <p class="c9"><span class="c3"></span></p> |
| | <p class="c18"><span class="c1">We conducted an exhaustive evaluation of multiple general-purpose Large Language |
| | Models (LLMs), including DeepSeek, Llama-4-Maverick, Mistral, Phi-4, GPT-4.1, and GPT-4o, across a diverse |
| | set of legal tasks categorized by Bloom’s taxonomy. The results illustrate substantial variability in |
| | performance, underscoring strengths and limitations inherent to each model.</span></p> |
| | <p class="c49"><span class="c8"></span></p> |
| | <p class="c58"><span |
| | style="overflow: hidden; display: inline-block; margin: 0.00px 0.00px; border: 0.00px solid #000000; transform: rotate(0.00rad) translateZ(0px); -webkit-transform: rotate(0.00rad) translateZ(0px); width: 709.49px; height: 436.60px;"><img |
| | alt="" src="images/image1.jpg" |
| | style="width: 709.49px; height: 436.60px; margin-left: 0.00px; margin-top: 0.00px; transform: rotate(0.00rad) translateZ(0px); -webkit-transform: rotate(0.00rad) translateZ(0px);" |
| | title=""></span></p> |
| | <p class="c53"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="9"> |
| | <li class="c11 li-bullet-2"> |
| | <h1 style="display:inline"><span class="c6">Quantitative Performance Overview</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c22"><span class="c1">Models exhibited distinct performance patterns:</span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c37 li-bullet-6"><span class="c3">Argument Mining (Accuracy)</span><span class="c1">: Mistral (96.9%) |
| | and Phi-4 (66.4%) significantly outperformed other models. Conversely, GPT-4.1 (1.3%) exhibited poor |
| | performance, indicating limitations in its capability to effectively extract nuanced arguments.</span> |
| | </li> |
| | <li class="c23 c56 li-bullet-5"><span class="c3">Article Recitation (Rouge-L)</span><span class="c1">: GPT-4.1 |
| | (45.9%) demonstrated superior capability in generating coherent textual content. GPT-4o and |
| | Llama-4-Maverick also showed commendable performances, whereas DeepSeek and Phi-4 displayed limitations, |
| | suggesting challenges with complex textual synthesis.</span></li> |
| | <li class="c0 c63 li-bullet-1"><span class="c3">Consultation (Rouge-L)</span><span class="c1">: Llama-4-Maverick |
| | (26.1%) and GPT-4o (24.5%) achieved relatively higher coherence, though overall performance across |
| | models remained limited, pointing toward inherent complexities and contextual nuances involved in |
| | consultative legal tasks.</span></li> |
| | </ul> |
| | <p class="c7"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="10"> |
| | <li class="c11 li-bullet-4"> |
| | <h1 style="display:inline"><span class="c6">Cognitive Level-Specific Insights</span></h1> |
| | </li> |
| | </ol> |
| | <ul class="c5 lst-kix_list_5-2 start"> |
| | <li class="c74 li-bullet-1"><span class="c3">Memorization & Understanding</span><span class="c1">: GPT |
| | variants excelled in foundational cognitive tasks requiring recall and basic comprehension, reflecting |
| | robust pre-training on extensive factual datasets.</span></li> |
| | <li class="c19 li-bullet-1"><span class="c3">Analyzing & Applying</span><span class="c1">: Mistral and Phi-4 |
| | excelled in complex analytical scenarios, demonstrating advanced capabilities in dissecting regulatory |
| | contexts and applying specific compliance frameworks accurately.</span></li> |
| | <li class="c0 c57 li-bullet-1"><span class="c3">Evaluating & Creating</span><span class="c1">: GPT-4o and |
| | GPT-4.1 displayed notable strengths in advanced evaluative and generative tasks, adeptly crafting |
| | contextually relevant legal arguments and strategic recommendations, albeit sometimes lacking detailed |
| | specificity required by highly specialized regulatory contexts.</span></li> |
| | </ul> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="11"> |
| | <li class="c11 li-bullet-3"> |
| | <h1 style="display:inline"><span class="c6">Detailed Comparative Analysis</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c15"><span class="c1">Quantitative analyses revealed nuanced insights into model performance:</span></p> |
| | <p class="c9"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c0 c71 li-bullet-5"><span class="c3">Generation Tasks</span><span class="c1">: GPT-4o demonstrated |
| | consistent superiority in generative capabilities, showcasing both fluency and contextual coherence, as |
| | evidenced by Rouge-L scores.</span></li> |
| | <li class="c0 c40 li-bullet-6"><span class="c3">Extraction and Classification Tasks</span><span class="c1">: |
| | Models like Mistral and Phi-4 excelled, particularly in tasks requiring precise extraction and accurate |
| | classification of regulatory details, suggesting their suitability for structured analytical legal |
| | applications.</span></li> |
| | </ul> |
| | <p class="c7"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-1" start="12"> |
| | <li class="c11 li-bullet-2"> |
| | <h1 style="display:inline"><span class="c6">Model Limitations and Performance Gaps</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c15"><span class="c1">Despite strengths, substantial limitations emerged across models:</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_5-2"> |
| | <li class="c23 c48 li-bullet-1"><span class="c3">Generalist vs. Domain-Specific Contexts</span><span |
| | class="c1">: All models, especially GPT-4.1, showed reduced efficacy in domain-specific tasks, |
| | reflecting the intrinsic limitations of generalist models in highly specialized regulatory |
| | scenarios.</span></li> |
| | <li class="c0 c59 li-bullet-5"><span class="c3">Complex Legal Reasoning</span><span class="c1">: None of the |
| | models consistently achieved high performance across all cognitive levels, indicating the need for more |
| | specialized training and refinement to effectively handle nuanced legal analyses and regulatory |
| | compliance tasks.</span></li> |
| | </ul> |
| | <p class="c9"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-0" start="6"> |
| | <li class="c34 li-bullet-8"> |
| | <h1 style="display:inline"><span class="c6">Expert Validation and Qualitative Assessment:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c36"><span class="c1">Legal expert assessments validated quantitative findings, providing deeper |
| | insights:</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_3-0 start"> |
| | <li class="c13 li-bullet-1"><span class="c1">GPT variants received recognition for general clarity and logical |
| | coherence but faced criticism for superficial handling of domain-specific regulatory nuances.</span> |
| | </li> |
| | <li class="c0 c81 li-bullet-1"><span class="c1">Mistral and Phi-4 garnered positive feedback for analytical |
| | rigor and precision, yet experts noted occasional difficulties in contextual interpretation and |
| | higher-order reasoning.</span></li> |
| | </ul> |
| | <p class="c2"><span class="c1"></span></p> |
| | <p class="c14 c31"><span class="c1">Expert evaluations also highlighted the practical implications of model |
| | performance limitations, emphasizing the importance of context-specific accuracy, interpretative depth, and |
| | compliance relevance.</span></p> |
| | <p class="c9"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-0" start="7"> |
| | <li class="c34 li-bullet-9"> |
| | <h1 style="display:inline"><span class="c6">Discussion and Future Directions:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c15"><span class="c1">The detailed analyses underscore clear pathways for further research and model |
| | improvement:</span></p> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ul class="c5 lst-kix_list_2-0 start"> |
| | <li class="c0 c85 li-bullet-1"><span class="c3">Domain-Specific Fine-Tuning</span><span class="c1">: Targeted |
| | fine-tuning on specific regulatory datasets is crucial for enhancing model performance, especially in |
| | specialized contexts.</span></li> |
| | <li class="c23 c38 li-bullet-1"><span class="c3">Hybrid Methodologies</span><span class="c1">: Integrating |
| | rule-based compliance frameworks with LLM outputs can substantially enhance accuracy and reliability, |
| | particularly in compliance- heavy scenarios.</span></li> |
| | <li class="c64 c67 li-bullet-5"><span class="c3">Expansion of Benchmarks</span><span class="c1">: Extending |
| | evaluations into other regulated domains such as healthcare, finance, and environmental law will provide |
| | broader insights into model generalizability and robustness.</span></li> |
| | </ul> |
| | <p class="c2"><span class="c1"></span></p> |
| | <ol class="c5 lst-kix_list_5-0" start="8"> |
| | <li class="c34 li-bullet-9"> |
| | <h1 style="display:inline"><span class="c6">Conclusion:</span></h1> |
| | </li> |
| | </ol> |
| | <p class="c15"><span class="c1">This comprehensive benchmarking exercise rigorously assessed multiple |
| | general-purpose Large Language Models across a structured array of legal cognitive tasks using Bloom’s |
| | taxonomy. Our results underscore both substantial strengths and pronounced limitations of current LLMs, |
| | particularly their struggles with nuanced, domain-specific regulatory</span></p> |
| | <p class="c18"><span class="c1">reasoning. Recognizing these limitations, we are currently advancing our proprietary |
| | model, Mike, with a dedicated and systematic training regimen explicitly structured around Bloom’s |
| | taxonomy and specialized regulatory compliance datasets. This targeted approach positions Mike to overcome |
| | many of the identified limitations, potentially enabling it to deliver advanced legal reasoning, precise |
| | regulatory compliance analyses, and practical solutions</span></p> |
| | <p class="c18"><span class="c1">tailored to specific industry contexts. Future developments will focus on enhancing |
| | Mike’s capacity for interpretive depth, nuanced reasoning, and regulatory precision, setting a robust |
| | foundation for its deployment in complex legal and regulatory environments.</span></p> |
| | <p class="c9"><span class="c1"></span></p> |
| | <h2 class="c18"><span class="c3">References:</span></h2> |
| | <p class="c2"><span class="c3"></span></p> |
| | <ul class="c5 lst-kix_list_1-0 start"> |
| | <li class="c0 li-bullet-6"><span class="c1">LEXam (2025), GitHub, HuggingFace.</span></li> |
| | <li class="c0 li-bullet-5"><span class="c1">LegalBench (2023), GitHub, OpenReview.</span></li> |
| | <li class="c0 li-bullet-1"><span class="c1">LLeQA (2023), GitHub, HuggingFace dataset.</span></li> |
| | <li class="c0 li-bullet-5"><span class="c1">LexEval (2024), ArXiv, GitHub.</span></li> |
| | </ul> |
| | </body> |
| |
|
| | </html> |