ThomasTheMaker commited on
Commit
2641bc6
·
verified ·
1 Parent(s): 8fd7e18

Upload evaluation/HuggingFaceTB_SmolLM_135M_Instruct_lighteval|arc:easy|0|0_20250802_155120.json with huggingface_hub

Browse files
evaluation/HuggingFaceTB_SmolLM_135M_Instruct_lighteval|arc:easy|0|0_20250802_155120.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "model_name": "HuggingFaceTB/SmolLM-135M-Instruct",
4
+ "test_name": "lighteval|arc:easy|0|0",
5
+ "evaluation_timestamp": "2025-08-02T15:51:20.396829",
6
+ "lighteval_version": "latest",
7
+ "evaluation_type": "automated"
8
+ },
9
+ "results": {
10
+ "config_general": {
11
+ "lighteval_sha": "?",
12
+ "num_fewshot_seeds": 1,
13
+ "max_samples": 10,
14
+ "job_id": 0,
15
+ "start_time": 360616.06827779,
16
+ "end_time": 360689.209673871,
17
+ "total_evaluation_time_secondes": "73.14139608101686",
18
+ "model_name": "HuggingFaceTB/SmolLM-135M-Instruct",
19
+ "model_sha": "fcc320f490e08fdb4b99d935b2c58d40bf35b0d0",
20
+ "model_dtype": null,
21
+ "model_size": "513.13 MB",
22
+ "generation_parameters": {
23
+ "early_stopping": null,
24
+ "repetition_penalty": null,
25
+ "frequency_penalty": null,
26
+ "length_penalty": null,
27
+ "presence_penalty": null,
28
+ "max_new_tokens": null,
29
+ "min_new_tokens": null,
30
+ "seed": null,
31
+ "stop_tokens": null,
32
+ "temperature": null,
33
+ "top_k": null,
34
+ "min_p": null,
35
+ "top_p": null,
36
+ "truncate_prompt": null,
37
+ "response_format": null
38
+ }
39
+ },
40
+ "results": {
41
+ "lighteval|arc:easy|0": {
42
+ "acc": 0.5,
43
+ "acc_stderr": 0.16666666666666666,
44
+ "acc_norm": 0.6,
45
+ "acc_norm_stderr": 0.16329931618554522
46
+ },
47
+ "all": {
48
+ "acc": 0.5,
49
+ "acc_stderr": 0.16666666666666666,
50
+ "acc_norm": 0.6,
51
+ "acc_norm_stderr": 0.16329931618554522
52
+ }
53
+ },
54
+ "versions": {
55
+ "lighteval|arc:easy|0": 0
56
+ },
57
+ "config_tasks": {
58
+ "lighteval|arc:easy": {
59
+ "name": "arc:easy",
60
+ "prompt_function": "arc",
61
+ "hf_repo": "ai2_arc",
62
+ "hf_subset": "ARC-Easy",
63
+ "metric": [
64
+ {
65
+ "metric_name": "acc",
66
+ "higher_is_better": true,
67
+ "category": "8",
68
+ "use_case": "1",
69
+ "sample_level_fn": "compute",
70
+ "corpus_level_fn": "mean"
71
+ },
72
+ {
73
+ "metric_name": "acc_norm",
74
+ "higher_is_better": true,
75
+ "category": "8",
76
+ "use_case": "1",
77
+ "sample_level_fn": "compute",
78
+ "corpus_level_fn": "mean"
79
+ }
80
+ ],
81
+ "hf_revision": null,
82
+ "hf_filter": null,
83
+ "hf_avail_splits": [
84
+ "train",
85
+ "validation",
86
+ "test"
87
+ ],
88
+ "trust_dataset": true,
89
+ "evaluation_splits": [
90
+ "test"
91
+ ],
92
+ "few_shots_split": null,
93
+ "few_shots_select": "random_sampling_from_train",
94
+ "generation_size": 1,
95
+ "generation_grammar": null,
96
+ "stop_sequence": [
97
+ "\n"
98
+ ],
99
+ "num_samples": null,
100
+ "suite": [
101
+ "lighteval",
102
+ "arc"
103
+ ],
104
+ "original_num_docs": 2376,
105
+ "effective_num_docs": 10,
106
+ "must_remove_duplicate_docs": false,
107
+ "version": 0
108
+ }
109
+ },
110
+ "summary_tasks": {
111
+ "lighteval|arc:easy|0": {
112
+ "hashes": {
113
+ "hash_examples": "96119a3c29ba3e3c",
114
+ "hash_full_prompts": "96119a3c29ba3e3c",
115
+ "hash_input_tokens": "233d9be9e9923e44",
116
+ "hash_cont_tokens": "93e2ee3a693fac3a"
117
+ },
118
+ "truncated": 0,
119
+ "non_truncated": 10,
120
+ "padded": 19,
121
+ "non_padded": 21,
122
+ "effective_few_shots": 0.0,
123
+ "num_truncated_few_shots": 0
124
+ }
125
+ },
126
+ "summary_general": {
127
+ "hashes": {
128
+ "hash_examples": "9dff40a4ec5f045b",
129
+ "hash_full_prompts": "9dff40a4ec5f045b",
130
+ "hash_input_tokens": "cb3a354712244814",
131
+ "hash_cont_tokens": "5d3292e6655b4397"
132
+ },
133
+ "truncated": 0,
134
+ "non_truncated": 10,
135
+ "padded": 19,
136
+ "non_padded": 21,
137
+ "num_truncated_few_shots": 0
138
+ }
139
+ }
140
+ }