fhai50032 commited on
Commit
0ae5516
·
verified ·
1 Parent(s): 9e66d11

Upload 4 files

Browse files
BiBo-Mini-0-99-BoolQ-Hindi.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_general": {
3
+ "lighteval_sha": "?",
4
+ "num_fewshot_seeds": 1,
5
+ "override_batch_size": -1,
6
+ "max_samples": null,
7
+ "job_id": "",
8
+ "start_time": 1541.919794194,
9
+ "end_time": 2653.777581735,
10
+ "total_evaluation_time_secondes": "1111.8577875409999",
11
+ "model_name": "tinycompany/BiBo-Mini-v0.99",
12
+ "model_sha": "9e66d1135c27c915ecf382453909e5bc1f6a650d",
13
+ "model_dtype": "torch.bfloat16",
14
+ "model_size": "3.31 GB",
15
+ "config": null
16
+ },
17
+ "results": {
18
+ "indiceval|Boolq:hindi|5": {
19
+ "acc": 0.6678899082568808,
20
+ "acc_stderr": 0.00823732150008232
21
+ },
22
+ "all": {
23
+ "acc": 0.6678899082568808,
24
+ "acc_stderr": 0.00823732150008232
25
+ }
26
+ },
27
+ "versions": {
28
+ "indiceval|Boolq:hindi|5": 0
29
+ },
30
+ "config_tasks": {
31
+ "indiceval|Boolq:hindi": {
32
+ "name": "Boolq:hindi",
33
+ "prompt_function": "boolq_harness_indic",
34
+ "hf_repo": "Cognitive-Lab/Indic-BoolQ",
35
+ "hf_subset": "hi",
36
+ "metric": [
37
+ "loglikelihood_acc"
38
+ ],
39
+ "hf_avail_splits": [
40
+ "train",
41
+ "validation"
42
+ ],
43
+ "evaluation_splits": [
44
+ "validation"
45
+ ],
46
+ "few_shots_split": null,
47
+ "few_shots_select": null,
48
+ "generation_size": -1,
49
+ "stop_sequence": [
50
+ "\n"
51
+ ],
52
+ "output_regex": null,
53
+ "frozen": false,
54
+ "suite": [
55
+ "indiceval",
56
+ "leaderboard",
57
+ "superglue"
58
+ ],
59
+ "original_num_docs": 3270,
60
+ "effective_num_docs": 3270,
61
+ "trust_dataset": true,
62
+ "must_remove_duplicate_docs": null
63
+ }
64
+ },
65
+ "summary_tasks": {
66
+ "indiceval|Boolq:hindi|5": {
67
+ "hashes": {
68
+ "hash_examples": "1a2035c16478d97f",
69
+ "hash_full_prompts": "6a700dcb36e00984",
70
+ "hash_input_tokens": "fb4c967f34bf3bc1",
71
+ "hash_cont_tokens": "a329388f38464b5f"
72
+ },
73
+ "truncated": 0,
74
+ "non_truncated": 3270,
75
+ "padded": 6516,
76
+ "non_padded": 24,
77
+ "effective_few_shots": 5.0,
78
+ "num_truncated_few_shots": 0
79
+ }
80
+ },
81
+ "summary_general": {
82
+ "hashes": {
83
+ "hash_examples": "fa93795de2d55c79",
84
+ "hash_full_prompts": "ba6a7917f7ec4138",
85
+ "hash_input_tokens": "acc16b51fd4266e9",
86
+ "hash_cont_tokens": "3aa1443bfadab8ee"
87
+ },
88
+ "truncated": 0,
89
+ "non_truncated": 3270,
90
+ "padded": 6516,
91
+ "non_padded": 24,
92
+ "num_truncated_few_shots": 0
93
+ },
94
+ "email": "fhai50032@gmail.com",
95
+ "language": "hindi"
96
+ }
BiBo-Mini-0-99-HellaSwag-Hindi.json ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_general": {
3
+ "lighteval_sha": "?",
4
+ "num_fewshot_seeds": 1,
5
+ "override_batch_size": -1,
6
+ "max_samples": null,
7
+ "job_id": "",
8
+ "start_time": 3030.218918019,
9
+ "end_time": 6482.604801924,
10
+ "total_evaluation_time_secondes": "3452.385883905",
11
+ "model_name": "tinycompany/BiBo-Mini-v0.99",
12
+ "model_sha": "9e66d1135c27c915ecf382453909e5bc1f6a650d",
13
+ "model_dtype": "torch.bfloat16",
14
+ "model_size": "3.31 GB",
15
+ "config": null
16
+ },
17
+ "results": {
18
+ "indiceval|Hellaswag:hindi|5": {
19
+ "acc": 0.4033061143198566,
20
+ "acc_stderr": 0.004895586329401321,
21
+ "acc_norm": 0.5071698864767975,
22
+ "acc_norm_stderr": 0.004989268362968727
23
+ },
24
+ "all": {
25
+ "acc": 0.4033061143198566,
26
+ "acc_stderr": 0.004895586329401321,
27
+ "acc_norm": 0.5071698864767975,
28
+ "acc_norm_stderr": 0.004989268362968727
29
+ }
30
+ },
31
+ "versions": {
32
+ "indiceval|Hellaswag:hindi|5": 0
33
+ },
34
+ "config_tasks": {
35
+ "indiceval|Hellaswag:hindi": {
36
+ "name": "Hellaswag:hindi",
37
+ "prompt_function": "hellaswag_harness_indic",
38
+ "hf_repo": "Cognitive-Lab/Indic-Hellaswag",
39
+ "hf_subset": "hi",
40
+ "metric": [
41
+ "loglikelihood_acc",
42
+ "loglikelihood_acc_norm"
43
+ ],
44
+ "hf_avail_splits": [
45
+ "test",
46
+ "validation"
47
+ ],
48
+ "evaluation_splits": [
49
+ "validation"
50
+ ],
51
+ "few_shots_split": null,
52
+ "few_shots_select": "random_sampling_from_train",
53
+ "generation_size": -1,
54
+ "stop_sequence": [
55
+ "\n"
56
+ ],
57
+ "output_regex": null,
58
+ "frozen": false,
59
+ "suite": [
60
+ "indiceval",
61
+ "leaderboard"
62
+ ],
63
+ "original_num_docs": 10042,
64
+ "effective_num_docs": 10042,
65
+ "trust_dataset": true,
66
+ "must_remove_duplicate_docs": null
67
+ }
68
+ },
69
+ "summary_tasks": {
70
+ "indiceval|Hellaswag:hindi|5": {
71
+ "hashes": {
72
+ "hash_examples": "d3879315240469aa",
73
+ "hash_full_prompts": "d317930bde85b05e",
74
+ "hash_input_tokens": "7e36df66d335e66d",
75
+ "hash_cont_tokens": "097c0d229cc1b4f3"
76
+ },
77
+ "truncated": 0,
78
+ "non_truncated": 10042,
79
+ "padded": 40040,
80
+ "non_padded": 128,
81
+ "effective_few_shots": 5.0,
82
+ "num_truncated_few_shots": 0
83
+ }
84
+ },
85
+ "summary_general": {
86
+ "hashes": {
87
+ "hash_examples": "3d62fd95e302ad7c",
88
+ "hash_full_prompts": "06017e5743861187",
89
+ "hash_input_tokens": "f993252db8b2cc1e",
90
+ "hash_cont_tokens": "92ae41037662c6d2"
91
+ },
92
+ "truncated": 0,
93
+ "non_truncated": 10042,
94
+ "padded": 40040,
95
+ "non_padded": 128,
96
+ "num_truncated_few_shots": 0
97
+ },
98
+ "email": "fhai50032@gmail.com",
99
+ "language": "hindi"
100
+ }
BiBo-Mini-0-99-MMLU-Hindi.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_general": {
3
+ "lighteval_sha": "?",
4
+ "num_fewshot_seeds": 1,
5
+ "override_batch_size": 4,
6
+ "max_samples": null,
7
+ "job_id": "",
8
+ "start_time": 481.387435385,
9
+ "end_time": 1498.931283883,
10
+ "total_evaluation_time_secondes": "1017.5438484980001",
11
+ "model_name": "tinycompany/BiBo-Mini-v0.99",
12
+ "model_sha": "9e66d1135c27c915ecf382453909e5bc1f6a650d",
13
+ "model_dtype": "torch.bfloat16",
14
+ "model_size": "3.31 GB",
15
+ "config": null
16
+ },
17
+ "results": {
18
+ "indiceval|MMLU:hindi|5": {
19
+ "acc": 0.3616293975217206,
20
+ "acc_stderr": 0.004054798562226004
21
+ },
22
+ "all": {
23
+ "acc": 0.3616293975217206,
24
+ "acc_stderr": 0.004054798562226004
25
+ }
26
+ },
27
+ "versions": {
28
+ "indiceval|MMLU:hindi|5": 0
29
+ },
30
+ "config_tasks": {
31
+ "indiceval|MMLU:hindi": {
32
+ "name": "MMLU:hindi",
33
+ "prompt_function": "mmlu_helm_indic",
34
+ "hf_repo": "Cognitive-Lab/Indic-MMLU",
35
+ "hf_subset": "hi",
36
+ "metric": [
37
+ "loglikelihood_acc_single_token"
38
+ ],
39
+ "hf_avail_splits": [
40
+ "test",
41
+ "validation",
42
+ "dev"
43
+ ],
44
+ "evaluation_splits": [
45
+ "test"
46
+ ],
47
+ "few_shots_split": "dev",
48
+ "few_shots_select": "sequential",
49
+ "generation_size": 5,
50
+ "stop_sequence": [
51
+ "\n"
52
+ ],
53
+ "output_regex": null,
54
+ "frozen": false,
55
+ "suite": [
56
+ "indiceval",
57
+ "leaderboard",
58
+ "mmlu"
59
+ ],
60
+ "original_num_docs": 14042,
61
+ "effective_num_docs": 14042,
62
+ "trust_dataset": true,
63
+ "must_remove_duplicate_docs": null
64
+ }
65
+ },
66
+ "summary_tasks": {
67
+ "indiceval|MMLU:hindi|5": {
68
+ "hashes": {
69
+ "hash_examples": "86da320415d99e54",
70
+ "hash_full_prompts": "7ba2716f08d15db0",
71
+ "hash_input_tokens": "5fa5c9f93a3d76c2",
72
+ "hash_cont_tokens": "f5200c7e7cd90dda"
73
+ },
74
+ "truncated": 0,
75
+ "non_truncated": 14042,
76
+ "padded": 13793,
77
+ "non_padded": 249,
78
+ "effective_few_shots": 5.0,
79
+ "num_truncated_few_shots": 0
80
+ }
81
+ },
82
+ "summary_general": {
83
+ "hashes": {
84
+ "hash_examples": "9772337600b1daad",
85
+ "hash_full_prompts": "b11cd5ef781ad9af",
86
+ "hash_input_tokens": "22684734a1248a23",
87
+ "hash_cont_tokens": "c4caaf2e5148f67f"
88
+ },
89
+ "truncated": 0,
90
+ "non_truncated": 14042,
91
+ "padded": 13793,
92
+ "non_padded": 249,
93
+ "num_truncated_few_shots": 0
94
+ },
95
+ "email": "fhai50032@gmail.com",
96
+ "language": "hindi"
97
+ }
BiBo-Mini-Arc-Easy-Challenege.json ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_general": {
3
+ "lighteval_sha": "?",
4
+ "num_fewshot_seeds": 1,
5
+ "override_batch_size": -1,
6
+ "max_samples": null,
7
+ "job_id": "",
8
+ "start_time": 1342.339773502,
9
+ "end_time": 6498.524528434,
10
+ "total_evaluation_time_secondes": "5156.184754932",
11
+ "model_name": "tinycompany/BiBo-Mini-v0.99",
12
+ "model_sha": "9e66d1135c27c915ecf382453909e5bc1f6a650d",
13
+ "model_dtype": "torch.bfloat16",
14
+ "model_size": "3.31 GB",
15
+ "config": null
16
+ },
17
+ "results": {
18
+ "indiceval|ARC-Challenge:hindi|10": {
19
+ "acc": 0.30887372013651876,
20
+ "acc_stderr": 0.013501770929344003,
21
+ "acc_norm": 0.3361774744027304,
22
+ "acc_norm_stderr": 0.013804855026205752
23
+ },
24
+ "indiceval|ARC-Easy:hindi|5": {
25
+ "acc": 0.5627104377104377,
26
+ "acc_stderr": 0.010178768429321586,
27
+ "acc_norm": 0.5349326599326599,
28
+ "acc_norm_stderr": 0.01023471305272368
29
+ },
30
+ "all": {
31
+ "acc": 0.43579207892347827,
32
+ "acc_stderr": 0.011840269679332795,
33
+ "acc_norm": 0.43555506716769515,
34
+ "acc_norm_stderr": 0.012019784039464717
35
+ }
36
+ },
37
+ "versions": {
38
+ "indiceval|ARC-Challenge:hindi|10": 0,
39
+ "indiceval|ARC-Easy:hindi|5": 0
40
+ },
41
+ "config_tasks": {
42
+ "indiceval|ARC-Challenge:hindi": {
43
+ "name": "ARC-Challenge:hindi",
44
+ "prompt_function": "arc_indic",
45
+ "hf_repo": "Cognitive-Lab/Indic-ARC-Challenge",
46
+ "hf_subset": "hi",
47
+ "metric": [
48
+ "loglikelihood_acc",
49
+ "loglikelihood_acc_norm_nospace"
50
+ ],
51
+ "hf_avail_splits": [
52
+ "train",
53
+ "validation",
54
+ "test"
55
+ ],
56
+ "evaluation_splits": [
57
+ "test"
58
+ ],
59
+ "few_shots_split": null,
60
+ "few_shots_select": "random_sampling_from_train",
61
+ "generation_size": 1,
62
+ "stop_sequence": [
63
+ "\n"
64
+ ],
65
+ "output_regex": null,
66
+ "frozen": false,
67
+ "suite": [
68
+ "indiceval",
69
+ "leaderboard",
70
+ "arc"
71
+ ],
72
+ "original_num_docs": 1172,
73
+ "effective_num_docs": 1172,
74
+ "trust_dataset": true,
75
+ "must_remove_duplicate_docs": null
76
+ },
77
+ "indiceval|ARC-Easy:hindi": {
78
+ "name": "ARC-Easy:hindi",
79
+ "prompt_function": "arc_indic",
80
+ "hf_repo": "Cognitive-Lab/Indic-ARC-Easy",
81
+ "hf_subset": "hi",
82
+ "metric": [
83
+ "loglikelihood_acc",
84
+ "loglikelihood_acc_norm_nospace"
85
+ ],
86
+ "hf_avail_splits": [
87
+ "train",
88
+ "validation",
89
+ "test"
90
+ ],
91
+ "evaluation_splits": [
92
+ "test"
93
+ ],
94
+ "few_shots_split": null,
95
+ "few_shots_select": "random_sampling_from_train",
96
+ "generation_size": 1,
97
+ "stop_sequence": [
98
+ "\n"
99
+ ],
100
+ "output_regex": null,
101
+ "frozen": false,
102
+ "suite": [
103
+ "indiceval",
104
+ "leaderboard",
105
+ "arc"
106
+ ],
107
+ "original_num_docs": 2376,
108
+ "effective_num_docs": 2376,
109
+ "trust_dataset": true,
110
+ "must_remove_duplicate_docs": null
111
+ }
112
+ },
113
+ "summary_tasks": {
114
+ "indiceval|ARC-Challenge:hindi|10": {
115
+ "hashes": {
116
+ "hash_examples": "ede3937107b50671",
117
+ "hash_full_prompts": "939fa143e0be8e76",
118
+ "hash_input_tokens": "260d85f401153bdc",
119
+ "hash_cont_tokens": "b95b113bb2e57385"
120
+ },
121
+ "truncated": 0,
122
+ "non_truncated": 1172,
123
+ "padded": 4680,
124
+ "non_padded": 7,
125
+ "effective_few_shots": 10.0,
126
+ "num_truncated_few_shots": 0
127
+ },
128
+ "indiceval|ARC-Easy:hindi|5": {
129
+ "hashes": {
130
+ "hash_examples": "0186dde6d6cf5f12",
131
+ "hash_full_prompts": "cbaaa6e70e1e350d",
132
+ "hash_input_tokens": "5496daca90725251",
133
+ "hash_cont_tokens": "f7cf5b125bc52602"
134
+ },
135
+ "truncated": 0,
136
+ "non_truncated": 2376,
137
+ "padded": 9441,
138
+ "non_padded": 60,
139
+ "effective_few_shots": 5.0,
140
+ "num_truncated_few_shots": 0
141
+ }
142
+ },
143
+ "summary_general": {
144
+ "hashes": {
145
+ "hash_examples": "799a1387b6c8a4d2",
146
+ "hash_full_prompts": "9938eadd006079ef",
147
+ "hash_input_tokens": "13eac7a7a2c17518",
148
+ "hash_cont_tokens": "14960e7bf91fd26b"
149
+ },
150
+ "truncated": 0,
151
+ "non_truncated": 3548,
152
+ "padded": 14121,
153
+ "non_padded": 67,
154
+ "num_truncated_few_shots": 0
155
+ },
156
+ "email": "fhai50032@gmail.com",
157
+ "language": "hindi"
158
+ }