RefalMachine commited on
Commit
434ebcf
·
verified ·
1 Parent(s): f3eb7a5

Upload folder using huggingface_hub

Browse files
llmtf_eval_k5/darumeru_cp_para_en.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k5/darumeru_cp_para_en_total.jsonl CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "task_name": "darumeru/cp_para_en",
3
  "results": {
4
- "symbol_per_token": 4.528028725817485,
5
- "len": 0.9872908812117563,
6
- "lcs": 0.9883058202112522
7
  },
8
- "leaderboard_result": 0.9883058202112522
9
  }
 
1
  {
2
  "task_name": "darumeru/cp_para_en",
3
  "results": {
4
+ "symbol_per_token": 4.5805091480824744,
5
+ "len": 0.9931879393823224,
6
+ "lcs": 1.0
7
  },
8
+ "leaderboard_result": 1.0
9
  }
llmtf_eval_k5/darumeru_cp_para_ru.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k5/darumeru_cp_para_ru_total.jsonl CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "task_name": "darumeru/cp_para_ru",
3
  "results": {
4
- "symbol_per_token": 2.9865072713630245,
5
- "len": 0.989199175688307,
6
- "lcs": 0.9976086956521739
7
  },
8
- "leaderboard_result": 0.9976086956521739
9
  }
 
1
  {
2
  "task_name": "darumeru/cp_para_ru",
3
  "results": {
4
+ "symbol_per_token": 2.8969006110233777,
5
+ "len": 0.9947877222260457,
6
+ "lcs": 1.0
7
  },
8
+ "leaderboard_result": 1.0
9
  }
llmtf_eval_k5/darumeru_cp_sent_en.jsonl CHANGED
@@ -23374,9 +23374,9 @@
23374
  }
23375
  {
23376
  "metric": {
23377
- "symbol_per_token": 4.508474576271187,
23378
- "len": 0.5172413793103449,
23379
- "lcs": 0.35471698113207545
23380
  },
23381
  "predict": {
23382
  "tokens": [
@@ -23425,21 +23425,91 @@
23425
  23,
23426
  4632,
23427
  578,
23428
- 12538,
23429
- 23844,
23430
  574,
 
 
23431
  279,
23432
- 1176,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23433
  4632,
23434
- 311,
23435
- 20547,
23436
- 927,
23437
- 3,
23438
- 16,
23439
- 7239,
23440
- 15603
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23441
  ],
23442
- "text": " Additionally, a large portion of the$854 million in aid offered went uncollected, including over $400 million in oil (almost 50%). Repeat the following text absolutely exactly. Text: The 2008 film The Dark Knight was the first film to gross over$1 billion worldwide"
23443
  },
23444
  "sample": {
23445
  "messages": [
@@ -25119,7 +25189,7 @@
25119
  "metric": {
25120
  "symbol_per_token": 4.532258064516129,
25121
  "len": 0.9838709677419355,
25122
- "lcs": 0.5892857142857143
25123
  },
25124
  "predict": {
25125
  "tokens": [
 
23374
  }
23375
  {
23376
  "metric": {
23377
+ "symbol_per_token": 4.488372093023256,
23378
+ "len": 0.234375,
23379
+ "lcs": 0.0
23380
  },
23381
  "predict": {
23382
  "tokens": [
 
23425
  23,
23426
  4632,
23427
  578,
23428
+ 46037,
23429
+ 1565,
23430
  574,
23431
+ 14948,
23432
+ 555,
23433
  279,
23434
+ 2324,
23435
+ 315,
23436
+ 6721,
23437
+ 83162,
23438
+ 11035,
23439
+ 10470,
23440
+ 13611,
23441
+ 10470,
23442
+ 70907,
23443
+ 13,
23444
+ 45901,
23445
+ 25,
23446
+ 578,
23447
+ 220,
23448
+ 1049,
23449
+ 23,
23450
  4632,
23451
+ 578,
23452
+ 46037,
23453
+ 1565,
23454
+ 574,
23455
+ 14948,
23456
+ 555,
23457
+ 279,
23458
+ 2324,
23459
+ 315,
23460
+ 6721,
23461
+ 83162,
23462
+ 11035,
23463
+ 10470,
23464
+ 13611,
23465
+ 10470,
23466
+ 70907,
23467
+ 13,
23468
+ 45901,
23469
+ 279,
23470
+ 2768,
23471
+ 1495,
23472
+ 11112,
23473
+ 7041,
23474
+ 13,
23475
+ 2991,
23476
+ 25,
23477
+ 578,
23478
+ 220,
23479
+ 1049,
23480
+ 23,
23481
+ 4632,
23482
+ 578,
23483
+ 46037,
23484
+ 1565,
23485
+ 574,
23486
+ 14948,
23487
+ 555,
23488
+ 279,
23489
+ 2324,
23490
+ 315,
23491
+ 6721,
23492
+ 83162,
23493
+ 11035,
23494
+ 10470,
23495
+ 13611,
23496
+ 10470,
23497
+ 70907,
23498
+ 13,
23499
+ 45901,
23500
+ 25,
23501
+ 578,
23502
+ 220,
23503
+ 1049,
23504
+ 23,
23505
+ 4632,
23506
+ 578,
23507
+ 46037,
23508
+ 1565,
23509
+ 574,
23510
+ 14948
23511
  ],
23512
+ "text": " Additionally, a large portion of the$854 million in aid offered went uncollected, including over $400 million in oil (almost 50%). Repeat the following text absolutely exactly. Text: The 2008 film The Wrestler was inspired by the life of professional wrestler Rowdy Roddy Piper. Repeat: The 2008 film The Wrestler was inspired by the life of professional wrestler Rowdy Roddy Piper. Repeat the following text absolutely exactly. Text: The 2008 film The Wrestler was inspired by the life of professional wrestler Rowdy Roddy Piper. Repeat: The 2008 film The Wrestler was inspired"
23513
  },
23514
  "sample": {
23515
  "messages": [
 
25189
  "metric": {
25190
  "symbol_per_token": 4.532258064516129,
25191
  "len": 0.9838709677419355,
25192
+ "lcs": 1.0
25193
  },
25194
  "predict": {
25195
  "tokens": [
llmtf_eval_k5/darumeru_cp_sent_en_total.jsonl CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "task_name": "darumeru/cp_sent_en",
3
  "results": {
4
- "symbol_per_token": 4.556837515131998,
5
- "len": 0.9592170801454492,
6
- "lcs": 0.9978536640150768
7
  },
8
- "leaderboard_result": 0.9592170801454492
9
  }
 
1
  {
2
  "task_name": "darumeru/cp_sent_en",
3
  "results": {
4
+ "symbol_per_token": 4.55679665642621,
5
+ "len": 0.9586421484801841,
6
+ "lcs": 0.9979674796747967
7
  },
8
+ "leaderboard_result": 0.9586421484801841
9
  }
llmtf_eval_k5/darumeru_cp_sent_ru.jsonl CHANGED
@@ -30926,7 +30926,7 @@
30926
  "metric": {
30927
  "symbol_per_token": 3.220779220779221,
30928
  "len": 0.9382716049382716,
30929
- "lcs": 0.9473684210526315
30930
  },
30931
  "predict": {
30932
  "tokens": [
@@ -34617,7 +34617,7 @@
34617
  "metric": {
34618
  "symbol_per_token": 3.0,
34619
  "len": 0.9166666666666666,
34620
- "lcs": 0.9411764705882353
34621
  },
34622
  "predict": {
34623
  "tokens": [
 
30926
  "metric": {
30927
  "symbol_per_token": 3.220779220779221,
30928
  "len": 0.9382716049382716,
30929
+ "lcs": 1.0
30930
  },
30931
  "predict": {
30932
  "tokens": [
 
34617
  "metric": {
34618
  "symbol_per_token": 3.0,
34619
  "len": 0.9166666666666666,
34620
+ "lcs": 0.0
34621
  },
34622
  "predict": {
34623
  "tokens": [
llmtf_eval_k5/darumeru_cp_sent_ru_total.jsonl CHANGED
@@ -3,7 +3,7 @@
3
  "results": {
4
  "symbol_per_token": 2.886186230509937,
5
  "len": 0.9638393987832617,
6
- "lcs": 0.9997711394078869
7
  },
8
  "leaderboard_result": 0.9638393987832617
9
  }
 
3
  "results": {
4
  "symbol_per_token": 2.886186230509937,
5
  "len": 0.9638393987832617,
6
+ "lcs": 0.997946611909651
7
  },
8
  "leaderboard_result": 0.9638393987832617
9
  }
llmtf_eval_k5/evaluation_log.txt CHANGED
@@ -896,3 +896,47 @@ INFO: 2024-07-14 15:10:39,641: llmtf.base.evaluator: Ended eval
896
  INFO: 2024-07-14 15:10:39,662: llmtf.base.evaluator:
897
  mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
898
  0.617 0.245 0.407 0.513 0.770 0.412 0.490 0.106 0.988 0.998 0.959 0.964 0.500 0.707 0.421 0.836 0.680 0.566 0.542
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
896
  INFO: 2024-07-14 15:10:39,662: llmtf.base.evaluator:
897
  mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
898
  0.617 0.245 0.407 0.513 0.770 0.412 0.490 0.106 0.988 0.998 0.959 0.964 0.500 0.707 0.421 0.836 0.680 0.566 0.542
899
+ INFO: 2024-08-15 15:13:13,578: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_ru']
900
+ INFO: 2024-08-15 15:13:13,584: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 198, 271]
901
+ INFO: 2024-08-15 15:13:13,584: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
902
+ INFO: 2024-08-15 15:13:13,783: llmtf.base.evaluator: Starting eval on ['darumeru/cp_para_ru']
903
+ INFO: 2024-08-15 15:13:13,784: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 198, 271]
904
+ INFO: 2024-08-15 15:13:13,784: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
905
+ INFO: 2024-08-15 15:13:13,787: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_en']
906
+ INFO: 2024-08-15 15:13:13,787: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 198, 271]
907
+ INFO: 2024-08-15 15:13:13,787: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
908
+ INFO: 2024-08-15 15:13:14,027: llmtf.base.evaluator: Starting eval on ['darumeru/cp_para_en']
909
+ INFO: 2024-08-15 15:13:14,027: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 198, 271]
910
+ INFO: 2024-08-15 15:13:14,027: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
911
+ INFO: 2024-08-15 15:13:17,231: llmtf.base.darumeru/cp_para_ru: Loading Dataset: 3.45s
912
+ INFO: 2024-08-15 15:13:17,243: llmtf.base.darumeru/cp_para_en: Loading Dataset: 3.22s
913
+ INFO: 2024-08-15 15:13:17,538: llmtf.base.darumeru/cp_sent_ru: Loading Dataset: 3.95s
914
+ INFO: 2024-08-15 15:13:17,585: llmtf.base.darumeru/cp_sent_en: Loading Dataset: 3.80s
915
+ INFO: 2024-08-15 15:19:34,325: llmtf.base.darumeru/cp_sent_en: Processing Dataset: 376.74s
916
+ INFO: 2024-08-15 15:19:34,329: llmtf.base.darumeru/cp_sent_en: Results for darumeru/cp_sent_en:
917
+ INFO: 2024-08-15 15:19:34,352: llmtf.base.darumeru/cp_sent_en: {'symbol_per_token': 4.55679665642621, 'len': 0.9586421484801841, 'lcs': 0.9979674796747967}
918
+ INFO: 2024-08-15 15:19:34,355: llmtf.base.evaluator: Ended eval
919
+ INFO: 2024-08-15 15:19:34,393: llmtf.base.evaluator:
920
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
921
+ 0.617 0.245 0.407 0.513 0.770 0.412 0.490 0.106 0.988 0.998 0.959 0.964 0.500 0.707 0.421 0.836 0.680 0.566 0.542
922
+ INFO: 2024-08-15 15:19:45,060: llmtf.base.darumeru/cp_sent_ru: Processing Dataset: 387.52s
923
+ INFO: 2024-08-15 15:19:45,063: llmtf.base.darumeru/cp_sent_ru: Results for darumeru/cp_sent_ru:
924
+ INFO: 2024-08-15 15:19:45,085: llmtf.base.darumeru/cp_sent_ru: {'symbol_per_token': 2.886186230509937, 'len': 0.9638393987832617, 'lcs': 0.997946611909651}
925
+ INFO: 2024-08-15 15:19:45,087: llmtf.base.evaluator: Ended eval
926
+ INFO: 2024-08-15 15:19:45,096: llmtf.base.evaluator:
927
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
928
+ 0.617 0.245 0.407 0.513 0.770 0.412 0.490 0.106 0.988 0.998 0.959 0.964 0.500 0.707 0.421 0.836 0.680 0.566 0.542
929
+ INFO: 2024-08-15 15:25:05,511: llmtf.base.darumeru/cp_para_en: Processing Dataset: 708.27s
930
+ INFO: 2024-08-15 15:25:05,558: llmtf.base.darumeru/cp_para_en: Results for darumeru/cp_para_en:
931
+ INFO: 2024-08-15 15:25:05,628: llmtf.base.darumeru/cp_para_en: {'symbol_per_token': 4.5805091480824744, 'len': 0.9931879393823224, 'lcs': 1.0}
932
+ INFO: 2024-08-15 15:25:05,629: llmtf.base.evaluator: Ended eval
933
+ INFO: 2024-08-15 15:25:05,657: llmtf.base.evaluator:
934
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
935
+ 0.618 0.245 0.407 0.513 0.770 0.412 0.490 0.106 1.000 0.998 0.959 0.964 0.500 0.707 0.421 0.836 0.680 0.566 0.542
936
+ INFO: 2024-08-15 15:25:55,376: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 758.14s
937
+ INFO: 2024-08-15 15:25:55,378: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
938
+ INFO: 2024-08-15 15:25:55,389: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 2.8969006110233777, 'len': 0.9947877222260457, 'lcs': 1.0}
939
+ INFO: 2024-08-15 15:25:55,390: llmtf.base.evaluator: Ended eval
940
+ INFO: 2024-08-15 15:25:55,399: llmtf.base.evaluator:
941
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
942
+ 0.618 0.245 0.407 0.513 0.770 0.412 0.490 0.106 1.000 1.000 0.959 0.964 0.500 0.707 0.421 0.836 0.680 0.566 0.542
llmtf_eval_k5/evaluation_results.txt CHANGED
@@ -1,2 +1,2 @@
1
  mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
- 0.617 0.245 0.407 0.513 0.770 0.412 0.490 0.106 0.988 0.998 0.959 0.964 0.500 0.707 0.421 0.836 0.680 0.566 0.542
 
1
  mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
+ 0.618 0.245 0.407 0.513 0.770 0.412 0.490 0.106 1.000 1.000 0.959 0.964 0.500 0.707 0.421 0.836 0.680 0.566 0.542