Update metadata
#4
by
iarcuschin
- opened
- benchmark_cases_metadata.csv +19 -19
- benchmark_cases_metadata.parquet +2 -2
- benchmark_metadata.json +84 -18
- benchmark_metadata_croissant.json +60 -0
benchmark_cases_metadata.csv
CHANGED
|
@@ -1,19 +1,19 @@
|
|
| 1 |
-
case_id,url,task_description,max_seq_len,min_seq_len,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.checkpoint_index,transformer_cfg.checkpoint_label_type,transformer_cfg.checkpoint_value,transformer_cfg.tokenizer_name,transformer_cfg.window_size,transformer_cfg.attn_types,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.device,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.rotary_dim,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.n_key_value_heads,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs
|
| 2 |
-
11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1460593486680443,True,False,standard,False,5.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 3 |
-
13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 4 |
-
18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 5 |
-
19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,32.0,15.0,8.0,custom,4.0,128.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.15689290811054724,True,False,standard,False,3.0,False,,24576.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 6 |
-
20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,14.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.16,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 7 |
-
21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 8 |
-
24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 9 |
-
3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,12.0,5.0,3.0,custom,4.0,48.0,gelu,6.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.22188007849009167,True,False,standard,False,1.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 10 |
-
33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17457431218879393,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 11 |
-
34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.16329931618554522,True,False,standard,False,5.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 12 |
-
35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 13 |
-
36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.19402850002906638,True,False,standard,False,3.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 14 |
-
37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 15 |
-
38,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38,Checks if tokens alternate between two types.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,2.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 16 |
-
4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17056057308448835,True,False,standard,False,1.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 17 |
-
8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.13333333333333333,True,False,standard,False,8.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 18 |
-
ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
|
| 19 |
-
ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
|
|
|
|
| 1 |
+
case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.checkpoint_index,transformer_cfg.checkpoint_label_type,transformer_cfg.checkpoint_value,transformer_cfg.tokenizer_name,transformer_cfg.window_size,transformer_cfg.attn_types,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.device,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.rotary_dim,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.n_key_value_heads,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs
|
| 2 |
+
11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1460593486680443,True,False,standard,False,5.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 3 |
+
13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 4 |
+
18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 5 |
+
19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,32.0,15.0,8.0,custom,4.0,128.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.15689290811054724,True,False,standard,False,3.0,False,,24576.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 6 |
+
20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,14.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.16,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 7 |
+
21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 8 |
+
24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 9 |
+
3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,12.0,5.0,3.0,custom,4.0,48.0,gelu,6.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.22188007849009167,True,False,standard,False,1.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 10 |
+
33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17457431218879393,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 11 |
+
34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.16329931618554522,True,False,standard,False,5.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 12 |
+
35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 13 |
+
36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.19402850002906638,True,False,standard,False,3.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 14 |
+
37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 15 |
+
38,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38,Checks if tokens alternate between two types.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,2.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 16 |
+
4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17056057308448835,True,False,standard,False,1.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 17 |
+
8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.13333333333333333,True,False,standard,False,8.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
|
| 18 |
+
ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
|
| 19 |
+
ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
|
benchmark_cases_metadata.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d42203edfeb52102b4df24aecf54b5a51c9c4f547b6cede024422c898564f69f
|
| 3 |
+
size 56701
|
benchmark_metadata.json
CHANGED
|
@@ -91,6 +91,7 @@
|
|
| 91 |
"trust_remote_code": false,
|
| 92 |
"rotary_adjacent_pairs": false
|
| 93 |
},
|
|
|
|
| 94 |
"training_args": {
|
| 95 |
"atol": 0.05,
|
| 96 |
"lr": 0.01,
|
|
@@ -102,7 +103,10 @@
|
|
| 102 |
"act_fn": "gelu",
|
| 103 |
"clip_grad_norm": 1.0,
|
| 104 |
"lr_scheduler": ""
|
| 105 |
-
}
|
|
|
|
|
|
|
|
|
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"case_id": "13",
|
|
@@ -185,6 +189,7 @@
|
|
| 185 |
"trust_remote_code": false,
|
| 186 |
"rotary_adjacent_pairs": false
|
| 187 |
},
|
|
|
|
| 188 |
"training_args": {
|
| 189 |
"atol": 0.05,
|
| 190 |
"lr": 0.01,
|
|
@@ -196,7 +201,10 @@
|
|
| 196 |
"act_fn": "gelu",
|
| 197 |
"clip_grad_norm": 1.0,
|
| 198 |
"lr_scheduler": ""
|
| 199 |
-
}
|
|
|
|
|
|
|
|
|
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"case_id": "18",
|
|
@@ -281,6 +289,7 @@
|
|
| 281 |
"trust_remote_code": false,
|
| 282 |
"rotary_adjacent_pairs": false
|
| 283 |
},
|
|
|
|
| 284 |
"training_args": {
|
| 285 |
"atol": 0.05,
|
| 286 |
"lr": 0.001,
|
|
@@ -292,7 +301,10 @@
|
|
| 292 |
"act_fn": "gelu",
|
| 293 |
"clip_grad_norm": 0.1,
|
| 294 |
"lr_scheduler": ""
|
| 295 |
-
}
|
|
|
|
|
|
|
|
|
|
| 296 |
},
|
| 297 |
{
|
| 298 |
"case_id": "19",
|
|
@@ -375,6 +387,7 @@
|
|
| 375 |
"trust_remote_code": false,
|
| 376 |
"rotary_adjacent_pairs": false
|
| 377 |
},
|
|
|
|
| 378 |
"training_args": {
|
| 379 |
"atol": 0.05,
|
| 380 |
"lr": 0.001,
|
|
@@ -386,7 +399,10 @@
|
|
| 386 |
"act_fn": "gelu",
|
| 387 |
"clip_grad_norm": 0.1,
|
| 388 |
"lr_scheduler": ""
|
| 389 |
-
}
|
|
|
|
|
|
|
|
|
|
| 390 |
},
|
| 391 |
{
|
| 392 |
"case_id": "20",
|
|
@@ -478,6 +494,7 @@
|
|
| 478 |
"trust_remote_code": false,
|
| 479 |
"rotary_adjacent_pairs": false
|
| 480 |
},
|
|
|
|
| 481 |
"training_args": {
|
| 482 |
"atol": 0.05,
|
| 483 |
"lr": 0.001,
|
|
@@ -489,7 +506,10 @@
|
|
| 489 |
"act_fn": "gelu",
|
| 490 |
"clip_grad_norm": 0.1,
|
| 491 |
"lr_scheduler": ""
|
| 492 |
-
}
|
|
|
|
|
|
|
|
|
|
| 493 |
},
|
| 494 |
{
|
| 495 |
"case_id": "21",
|
|
@@ -572,6 +592,7 @@
|
|
| 572 |
"trust_remote_code": false,
|
| 573 |
"rotary_adjacent_pairs": false
|
| 574 |
},
|
|
|
|
| 575 |
"training_args": {
|
| 576 |
"atol": 0.05,
|
| 577 |
"lr": 0.01,
|
|
@@ -583,7 +604,10 @@
|
|
| 583 |
"act_fn": "gelu",
|
| 584 |
"clip_grad_norm": 1.0,
|
| 585 |
"lr_scheduler": ""
|
| 586 |
-
}
|
|
|
|
|
|
|
|
|
|
| 587 |
},
|
| 588 |
{
|
| 589 |
"case_id": "24",
|
|
@@ -666,6 +690,7 @@
|
|
| 666 |
"trust_remote_code": false,
|
| 667 |
"rotary_adjacent_pairs": false
|
| 668 |
},
|
|
|
|
| 669 |
"training_args": {
|
| 670 |
"atol": 0.05,
|
| 671 |
"lr": 0.01,
|
|
@@ -677,7 +702,10 @@
|
|
| 677 |
"act_fn": "gelu",
|
| 678 |
"clip_grad_norm": 1.0,
|
| 679 |
"lr_scheduler": ""
|
| 680 |
-
}
|
|
|
|
|
|
|
|
|
|
| 681 |
},
|
| 682 |
{
|
| 683 |
"case_id": "3",
|
|
@@ -761,6 +789,7 @@
|
|
| 761 |
"trust_remote_code": false,
|
| 762 |
"rotary_adjacent_pairs": false
|
| 763 |
},
|
|
|
|
| 764 |
"training_args": {
|
| 765 |
"atol": 0.05,
|
| 766 |
"lr": 0.001,
|
|
@@ -772,7 +801,10 @@
|
|
| 772 |
"act_fn": "gelu",
|
| 773 |
"clip_grad_norm": 0.1,
|
| 774 |
"lr_scheduler": ""
|
| 775 |
-
}
|
|
|
|
|
|
|
|
|
|
| 776 |
},
|
| 777 |
{
|
| 778 |
"case_id": "33",
|
|
@@ -860,6 +892,7 @@
|
|
| 860 |
"trust_remote_code": false,
|
| 861 |
"rotary_adjacent_pairs": false
|
| 862 |
},
|
|
|
|
| 863 |
"training_args": {
|
| 864 |
"atol": 0.05,
|
| 865 |
"lr": 0.001,
|
|
@@ -871,7 +904,10 @@
|
|
| 871 |
"act_fn": "gelu",
|
| 872 |
"clip_grad_norm": 0.1,
|
| 873 |
"lr_scheduler": ""
|
| 874 |
-
}
|
|
|
|
|
|
|
|
|
|
| 875 |
},
|
| 876 |
{
|
| 877 |
"case_id": "34",
|
|
@@ -959,6 +995,7 @@
|
|
| 959 |
"trust_remote_code": false,
|
| 960 |
"rotary_adjacent_pairs": false
|
| 961 |
},
|
|
|
|
| 962 |
"training_args": {
|
| 963 |
"atol": 0.05,
|
| 964 |
"lr": 0.001,
|
|
@@ -970,7 +1007,10 @@
|
|
| 970 |
"act_fn": "gelu",
|
| 971 |
"clip_grad_norm": 0.1,
|
| 972 |
"lr_scheduler": ""
|
| 973 |
-
}
|
|
|
|
|
|
|
|
|
|
| 974 |
},
|
| 975 |
{
|
| 976 |
"case_id": "35",
|
|
@@ -1058,6 +1098,7 @@
|
|
| 1058 |
"trust_remote_code": false,
|
| 1059 |
"rotary_adjacent_pairs": false
|
| 1060 |
},
|
|
|
|
| 1061 |
"training_args": {
|
| 1062 |
"atol": 0.05,
|
| 1063 |
"lr": 0.001,
|
|
@@ -1069,7 +1110,10 @@
|
|
| 1069 |
"act_fn": "gelu",
|
| 1070 |
"clip_grad_norm": 0.1,
|
| 1071 |
"lr_scheduler": ""
|
| 1072 |
-
}
|
|
|
|
|
|
|
|
|
|
| 1073 |
},
|
| 1074 |
{
|
| 1075 |
"case_id": "36",
|
|
@@ -1152,6 +1196,7 @@
|
|
| 1152 |
"trust_remote_code": false,
|
| 1153 |
"rotary_adjacent_pairs": false
|
| 1154 |
},
|
|
|
|
| 1155 |
"training_args": {
|
| 1156 |
"atol": 0.05,
|
| 1157 |
"lr": 0.001,
|
|
@@ -1163,7 +1208,10 @@
|
|
| 1163 |
"act_fn": "gelu",
|
| 1164 |
"clip_grad_norm": 0.1,
|
| 1165 |
"lr_scheduler": ""
|
| 1166 |
-
}
|
|
|
|
|
|
|
|
|
|
| 1167 |
},
|
| 1168 |
{
|
| 1169 |
"case_id": "37",
|
|
@@ -1251,6 +1299,7 @@
|
|
| 1251 |
"trust_remote_code": false,
|
| 1252 |
"rotary_adjacent_pairs": false
|
| 1253 |
},
|
|
|
|
| 1254 |
"training_args": {
|
| 1255 |
"atol": 0.05,
|
| 1256 |
"lr": 0.001,
|
|
@@ -1262,7 +1311,10 @@
|
|
| 1262 |
"act_fn": "gelu",
|
| 1263 |
"clip_grad_norm": 0.1,
|
| 1264 |
"lr_scheduler": ""
|
| 1265 |
-
}
|
|
|
|
|
|
|
|
|
|
| 1266 |
},
|
| 1267 |
{
|
| 1268 |
"case_id": "38",
|
|
@@ -1345,6 +1397,7 @@
|
|
| 1345 |
"trust_remote_code": false,
|
| 1346 |
"rotary_adjacent_pairs": false
|
| 1347 |
},
|
|
|
|
| 1348 |
"training_args": {
|
| 1349 |
"atol": 0.05,
|
| 1350 |
"lr": 0.001,
|
|
@@ -1356,7 +1409,10 @@
|
|
| 1356 |
"act_fn": "gelu",
|
| 1357 |
"clip_grad_norm": 0.1,
|
| 1358 |
"lr_scheduler": ""
|
| 1359 |
-
}
|
|
|
|
|
|
|
|
|
|
| 1360 |
},
|
| 1361 |
{
|
| 1362 |
"case_id": "4",
|
|
@@ -1441,6 +1497,7 @@
|
|
| 1441 |
"trust_remote_code": false,
|
| 1442 |
"rotary_adjacent_pairs": false
|
| 1443 |
},
|
|
|
|
| 1444 |
"training_args": {
|
| 1445 |
"atol": 0.05,
|
| 1446 |
"lr": 0.001,
|
|
@@ -1452,7 +1509,10 @@
|
|
| 1452 |
"act_fn": "gelu",
|
| 1453 |
"clip_grad_norm": 0.1,
|
| 1454 |
"lr_scheduler": ""
|
| 1455 |
-
}
|
|
|
|
|
|
|
|
|
|
| 1456 |
},
|
| 1457 |
{
|
| 1458 |
"case_id": "8",
|
|
@@ -1540,6 +1600,7 @@
|
|
| 1540 |
"trust_remote_code": false,
|
| 1541 |
"rotary_adjacent_pairs": false
|
| 1542 |
},
|
|
|
|
| 1543 |
"training_args": {
|
| 1544 |
"atol": 0.05,
|
| 1545 |
"lr": 0.01,
|
|
@@ -1551,7 +1612,10 @@
|
|
| 1551 |
"act_fn": "gelu",
|
| 1552 |
"clip_grad_norm": 1.0,
|
| 1553 |
"lr_scheduler": ""
|
| 1554 |
-
}
|
|
|
|
|
|
|
|
|
|
| 1555 |
},
|
| 1556 |
{
|
| 1557 |
"case_id": "ioi",
|
|
@@ -1568,7 +1632,8 @@
|
|
| 1568 |
"file_name": "ll_model_100_100_40.pth",
|
| 1569 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
|
| 1570 |
}
|
| 1571 |
-
]
|
|
|
|
| 1572 |
},
|
| 1573 |
{
|
| 1574 |
"case_id": "ioi_next_token",
|
|
@@ -1589,7 +1654,8 @@
|
|
| 1589 |
"file_name": "training_args.json",
|
| 1590 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/training_args.json"
|
| 1591 |
}
|
| 1592 |
-
]
|
|
|
|
| 1593 |
}
|
| 1594 |
]
|
| 1595 |
}
|
|
|
|
| 91 |
"trust_remote_code": false,
|
| 92 |
"rotary_adjacent_pairs": false
|
| 93 |
},
|
| 94 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl",
|
| 95 |
"training_args": {
|
| 96 |
"atol": 0.05,
|
| 97 |
"lr": 0.01,
|
|
|
|
| 103 |
"act_fn": "gelu",
|
| 104 |
"clip_grad_norm": 1.0,
|
| 105 |
"lr_scheduler": ""
|
| 106 |
+
},
|
| 107 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json",
|
| 108 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth",
|
| 109 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
|
| 110 |
},
|
| 111 |
{
|
| 112 |
"case_id": "13",
|
|
|
|
| 189 |
"trust_remote_code": false,
|
| 190 |
"rotary_adjacent_pairs": false
|
| 191 |
},
|
| 192 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl",
|
| 193 |
"training_args": {
|
| 194 |
"atol": 0.05,
|
| 195 |
"lr": 0.01,
|
|
|
|
| 201 |
"act_fn": "gelu",
|
| 202 |
"clip_grad_norm": 1.0,
|
| 203 |
"lr_scheduler": ""
|
| 204 |
+
},
|
| 205 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json",
|
| 206 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth",
|
| 207 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
|
| 208 |
},
|
| 209 |
{
|
| 210 |
"case_id": "18",
|
|
|
|
| 289 |
"trust_remote_code": false,
|
| 290 |
"rotary_adjacent_pairs": false
|
| 291 |
},
|
| 292 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl",
|
| 293 |
"training_args": {
|
| 294 |
"atol": 0.05,
|
| 295 |
"lr": 0.001,
|
|
|
|
| 301 |
"act_fn": "gelu",
|
| 302 |
"clip_grad_norm": 0.1,
|
| 303 |
"lr_scheduler": ""
|
| 304 |
+
},
|
| 305 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json",
|
| 306 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth",
|
| 307 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
|
| 308 |
},
|
| 309 |
{
|
| 310 |
"case_id": "19",
|
|
|
|
| 387 |
"trust_remote_code": false,
|
| 388 |
"rotary_adjacent_pairs": false
|
| 389 |
},
|
| 390 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl",
|
| 391 |
"training_args": {
|
| 392 |
"atol": 0.05,
|
| 393 |
"lr": 0.001,
|
|
|
|
| 399 |
"act_fn": "gelu",
|
| 400 |
"clip_grad_norm": 0.1,
|
| 401 |
"lr_scheduler": ""
|
| 402 |
+
},
|
| 403 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json",
|
| 404 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth",
|
| 405 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
|
| 406 |
},
|
| 407 |
{
|
| 408 |
"case_id": "20",
|
|
|
|
| 494 |
"trust_remote_code": false,
|
| 495 |
"rotary_adjacent_pairs": false
|
| 496 |
},
|
| 497 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl",
|
| 498 |
"training_args": {
|
| 499 |
"atol": 0.05,
|
| 500 |
"lr": 0.001,
|
|
|
|
| 506 |
"act_fn": "gelu",
|
| 507 |
"clip_grad_norm": 0.1,
|
| 508 |
"lr_scheduler": ""
|
| 509 |
+
},
|
| 510 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json",
|
| 511 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth",
|
| 512 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
|
| 513 |
},
|
| 514 |
{
|
| 515 |
"case_id": "21",
|
|
|
|
| 592 |
"trust_remote_code": false,
|
| 593 |
"rotary_adjacent_pairs": false
|
| 594 |
},
|
| 595 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl",
|
| 596 |
"training_args": {
|
| 597 |
"atol": 0.05,
|
| 598 |
"lr": 0.01,
|
|
|
|
| 604 |
"act_fn": "gelu",
|
| 605 |
"clip_grad_norm": 1.0,
|
| 606 |
"lr_scheduler": ""
|
| 607 |
+
},
|
| 608 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json",
|
| 609 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth",
|
| 610 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
|
| 611 |
},
|
| 612 |
{
|
| 613 |
"case_id": "24",
|
|
|
|
| 690 |
"trust_remote_code": false,
|
| 691 |
"rotary_adjacent_pairs": false
|
| 692 |
},
|
| 693 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl",
|
| 694 |
"training_args": {
|
| 695 |
"atol": 0.05,
|
| 696 |
"lr": 0.01,
|
|
|
|
| 702 |
"act_fn": "gelu",
|
| 703 |
"clip_grad_norm": 1.0,
|
| 704 |
"lr_scheduler": ""
|
| 705 |
+
},
|
| 706 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json",
|
| 707 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth",
|
| 708 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
|
| 709 |
},
|
| 710 |
{
|
| 711 |
"case_id": "3",
|
|
|
|
| 789 |
"trust_remote_code": false,
|
| 790 |
"rotary_adjacent_pairs": false
|
| 791 |
},
|
| 792 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl",
|
| 793 |
"training_args": {
|
| 794 |
"atol": 0.05,
|
| 795 |
"lr": 0.001,
|
|
|
|
| 801 |
"act_fn": "gelu",
|
| 802 |
"clip_grad_norm": 0.1,
|
| 803 |
"lr_scheduler": ""
|
| 804 |
+
},
|
| 805 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json",
|
| 806 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth",
|
| 807 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
|
| 808 |
},
|
| 809 |
{
|
| 810 |
"case_id": "33",
|
|
|
|
| 892 |
"trust_remote_code": false,
|
| 893 |
"rotary_adjacent_pairs": false
|
| 894 |
},
|
| 895 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl",
|
| 896 |
"training_args": {
|
| 897 |
"atol": 0.05,
|
| 898 |
"lr": 0.001,
|
|
|
|
| 904 |
"act_fn": "gelu",
|
| 905 |
"clip_grad_norm": 0.1,
|
| 906 |
"lr_scheduler": ""
|
| 907 |
+
},
|
| 908 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json",
|
| 909 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth",
|
| 910 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
|
| 911 |
},
|
| 912 |
{
|
| 913 |
"case_id": "34",
|
|
|
|
| 995 |
"trust_remote_code": false,
|
| 996 |
"rotary_adjacent_pairs": false
|
| 997 |
},
|
| 998 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl",
|
| 999 |
"training_args": {
|
| 1000 |
"atol": 0.05,
|
| 1001 |
"lr": 0.001,
|
|
|
|
| 1007 |
"act_fn": "gelu",
|
| 1008 |
"clip_grad_norm": 0.1,
|
| 1009 |
"lr_scheduler": ""
|
| 1010 |
+
},
|
| 1011 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json",
|
| 1012 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth",
|
| 1013 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
|
| 1014 |
},
|
| 1015 |
{
|
| 1016 |
"case_id": "35",
|
|
|
|
| 1098 |
"trust_remote_code": false,
|
| 1099 |
"rotary_adjacent_pairs": false
|
| 1100 |
},
|
| 1101 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl",
|
| 1102 |
"training_args": {
|
| 1103 |
"atol": 0.05,
|
| 1104 |
"lr": 0.001,
|
|
|
|
| 1110 |
"act_fn": "gelu",
|
| 1111 |
"clip_grad_norm": 0.1,
|
| 1112 |
"lr_scheduler": ""
|
| 1113 |
+
},
|
| 1114 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json",
|
| 1115 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth",
|
| 1116 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
|
| 1117 |
},
|
| 1118 |
{
|
| 1119 |
"case_id": "36",
|
|
|
|
| 1196 |
"trust_remote_code": false,
|
| 1197 |
"rotary_adjacent_pairs": false
|
| 1198 |
},
|
| 1199 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl",
|
| 1200 |
"training_args": {
|
| 1201 |
"atol": 0.05,
|
| 1202 |
"lr": 0.001,
|
|
|
|
| 1208 |
"act_fn": "gelu",
|
| 1209 |
"clip_grad_norm": 0.1,
|
| 1210 |
"lr_scheduler": ""
|
| 1211 |
+
},
|
| 1212 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json",
|
| 1213 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth",
|
| 1214 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
|
| 1215 |
},
|
| 1216 |
{
|
| 1217 |
"case_id": "37",
|
|
|
|
| 1299 |
"trust_remote_code": false,
|
| 1300 |
"rotary_adjacent_pairs": false
|
| 1301 |
},
|
| 1302 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl",
|
| 1303 |
"training_args": {
|
| 1304 |
"atol": 0.05,
|
| 1305 |
"lr": 0.001,
|
|
|
|
| 1311 |
"act_fn": "gelu",
|
| 1312 |
"clip_grad_norm": 0.1,
|
| 1313 |
"lr_scheduler": ""
|
| 1314 |
+
},
|
| 1315 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json",
|
| 1316 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth",
|
| 1317 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
|
| 1318 |
},
|
| 1319 |
{
|
| 1320 |
"case_id": "38",
|
|
|
|
| 1397 |
"trust_remote_code": false,
|
| 1398 |
"rotary_adjacent_pairs": false
|
| 1399 |
},
|
| 1400 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl",
|
| 1401 |
"training_args": {
|
| 1402 |
"atol": 0.05,
|
| 1403 |
"lr": 0.001,
|
|
|
|
| 1409 |
"act_fn": "gelu",
|
| 1410 |
"clip_grad_norm": 0.1,
|
| 1411 |
"lr_scheduler": ""
|
| 1412 |
+
},
|
| 1413 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json",
|
| 1414 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth",
|
| 1415 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl"
|
| 1416 |
},
|
| 1417 |
{
|
| 1418 |
"case_id": "4",
|
|
|
|
| 1497 |
"trust_remote_code": false,
|
| 1498 |
"rotary_adjacent_pairs": false
|
| 1499 |
},
|
| 1500 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl",
|
| 1501 |
"training_args": {
|
| 1502 |
"atol": 0.05,
|
| 1503 |
"lr": 0.001,
|
|
|
|
| 1509 |
"act_fn": "gelu",
|
| 1510 |
"clip_grad_norm": 0.1,
|
| 1511 |
"lr_scheduler": ""
|
| 1512 |
+
},
|
| 1513 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json",
|
| 1514 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth",
|
| 1515 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
|
| 1516 |
},
|
| 1517 |
{
|
| 1518 |
"case_id": "8",
|
|
|
|
| 1600 |
"trust_remote_code": false,
|
| 1601 |
"rotary_adjacent_pairs": false
|
| 1602 |
},
|
| 1603 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl",
|
| 1604 |
"training_args": {
|
| 1605 |
"atol": 0.05,
|
| 1606 |
"lr": 0.01,
|
|
|
|
| 1612 |
"act_fn": "gelu",
|
| 1613 |
"clip_grad_norm": 1.0,
|
| 1614 |
"lr_scheduler": ""
|
| 1615 |
+
},
|
| 1616 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json",
|
| 1617 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth",
|
| 1618 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
|
| 1619 |
},
|
| 1620 |
{
|
| 1621 |
"case_id": "ioi",
|
|
|
|
| 1632 |
"file_name": "ll_model_100_100_40.pth",
|
| 1633 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
|
| 1634 |
}
|
| 1635 |
+
],
|
| 1636 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
|
| 1637 |
},
|
| 1638 |
{
|
| 1639 |
"case_id": "ioi_next_token",
|
|
|
|
| 1654 |
"file_name": "training_args.json",
|
| 1655 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/training_args.json"
|
| 1656 |
}
|
| 1657 |
+
],
|
| 1658 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth"
|
| 1659 |
}
|
| 1660 |
]
|
| 1661 |
}
|
benchmark_metadata_croissant.json
CHANGED
|
@@ -197,6 +197,66 @@
|
|
| 197 |
}
|
| 198 |
}
|
| 199 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
{
|
| 201 |
"@type": "cr:Field",
|
| 202 |
"@id": "training_args.atol",
|
|
|
|
| 197 |
}
|
| 198 |
}
|
| 199 |
},
|
| 200 |
+
{
|
| 201 |
+
"@type": "cr:Field",
|
| 202 |
+
"@id": "transformer_cfg_file_url",
|
| 203 |
+
"name": "transformer_cfg_file_url",
|
| 204 |
+
"description": "Column 'transformer_cfg_file_url' from the parquet file describing all the cases in the benchmark.",
|
| 205 |
+
"dataType": "sc:Text",
|
| 206 |
+
"source": {
|
| 207 |
+
"fileSet": {
|
| 208 |
+
"@id": "benchmark-cases-parquet"
|
| 209 |
+
},
|
| 210 |
+
"extract": {
|
| 211 |
+
"column": "transformer_cfg_file_url"
|
| 212 |
+
}
|
| 213 |
+
}
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"@type": "cr:Field",
|
| 217 |
+
"@id": "training_args_file_url",
|
| 218 |
+
"name": "training_args_file_url",
|
| 219 |
+
"description": "Column 'training_args_file_url' from the parquet file describing all the cases in the benchmark.",
|
| 220 |
+
"dataType": "sc:Text",
|
| 221 |
+
"source": {
|
| 222 |
+
"fileSet": {
|
| 223 |
+
"@id": "benchmark-cases-parquet"
|
| 224 |
+
},
|
| 225 |
+
"extract": {
|
| 226 |
+
"column": "training_args_file_url"
|
| 227 |
+
}
|
| 228 |
+
}
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"@type": "cr:Field",
|
| 232 |
+
"@id": "weights_file_url",
|
| 233 |
+
"name": "weights_file_url",
|
| 234 |
+
"description": "Column 'weights_file_url' from the parquet file describing all the cases in the benchmark.",
|
| 235 |
+
"dataType": "sc:Text",
|
| 236 |
+
"source": {
|
| 237 |
+
"fileSet": {
|
| 238 |
+
"@id": "benchmark-cases-parquet"
|
| 239 |
+
},
|
| 240 |
+
"extract": {
|
| 241 |
+
"column": "weights_file_url"
|
| 242 |
+
}
|
| 243 |
+
}
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"@type": "cr:Field",
|
| 247 |
+
"@id": "circuit_file_url",
|
| 248 |
+
"name": "circuit_file_url",
|
| 249 |
+
"description": "Column 'circuit_file_url' from the parquet file describing all the cases in the benchmark.",
|
| 250 |
+
"dataType": "sc:Text",
|
| 251 |
+
"source": {
|
| 252 |
+
"fileSet": {
|
| 253 |
+
"@id": "benchmark-cases-parquet"
|
| 254 |
+
},
|
| 255 |
+
"extract": {
|
| 256 |
+
"column": "circuit_file_url"
|
| 257 |
+
}
|
| 258 |
+
}
|
| 259 |
+
},
|
| 260 |
{
|
| 261 |
"@type": "cr:Field",
|
| 262 |
"@id": "training_args.atol",
|