Update metadata

by iarcuschin - opened Jun 12, 2024

base: refs/heads/main

←

from: refs/pr/5

Discussion Files changed

+645

-470

Files changed (4) hide show

benchmark_cases_metadata.csv +19 -19
benchmark_cases_metadata.parquet +2 -2
benchmark_metadata.json +474 -314
benchmark_metadata_croissant.json +150 -135

benchmark_cases_metadata.csv CHANGED Viewed

@@ -1,19 +1,19 @@
-case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.checkpoint_index,transformer_cfg.checkpoint_label_type,transformer_cfg.checkpoint_value,transformer_cfg.tokenizer_name,transformer_cfg.window_size,transformer_cfg.attn_types,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.device,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.rotary_dim,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.n_key_value_heads,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs
-11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1460593486680443,True,False,standard,False,5.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
-13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
-18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
-19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,32.0,15.0,8.0,custom,4.0,128.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.15689290811054724,True,False,standard,False,3.0,False,,24576.0,False,False,True,torch.float32,,,False,10000.0,False,False
-20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,14.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.16,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
-21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
-24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
-3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,12.0,5.0,3.0,custom,4.0,48.0,gelu,6.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.22188007849009167,True,False,standard,False,1.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
-33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17457431218879393,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
-34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.16329931618554522,True,False,standard,False,5.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
-35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
-36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.19402850002906638,True,False,standard,False,3.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
-37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
-38,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38,Checks if tokens alternate between two types.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,2.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
-4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17056057308448835,True,False,standard,False,1.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
-8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.13333333333333333,True,False,standard,False,8.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
-ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
-ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True

+case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.tokenizer_name,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs,training_args.model_pair,training_args.next_token,training_args.non_ioi_thresh,training_args.use_per_token_check,training_args.batch_size,training_args.num_workers,training_args.early_stop,training_args.scheduler_val_metric,training_args.scheduler_mode
+11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1460593486680443,True,False,standard,False,5,False,3456,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
+13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2,20,10,5,custom,4,80,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3,False,9600,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
+18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,26,10,6,custom,4,104,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3,False,15808,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
+19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,32,15,8,custom,4,128,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.15689290811054724,True,False,standard,False,3,False,24576,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
+20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,13,10,3,custom,4,52,gelu,14,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16,True,False,standard,False,2,False,3952,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
+21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,0.05,0.0005,False,1.0,1.0,0.5,2000.0,gelu,0.1,,4,50,10,12,custom,4,200,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,3,False,118400,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
+26,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26,Creates a cascading effect by repeating each token in sequence incrementally.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,21,10,5,custom,4,84,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12344267996967354,True,False,standard,False,27,False,10416,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
+29,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29,Creates abbreviations for each token in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,13,10,3,custom,4,52,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3952,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
+3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2,12,5,3,custom,4,48,gelu,6,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.22188007849009167,True,False,standard,False,1,False,3456,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
+33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,4,10,1,custom,4,16,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17457431218879393,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
+34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,16,10,4,custom,4,64,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,5,False,6144,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
+35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,9,10,2,custom,4,36,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,1872,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
+36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,6,10,1,custom,4,24,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.19402850002906638,True,False,standard,False,3,False,768,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
+37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3456,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
+4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,20,10,5,custom,4,80,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,1,False,9600,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
+8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2,20,10,5,custom,4,80,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.13333333333333333,True,False,standard,False,8,False,9600,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
+ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,,True,0.65,False,256.0,0.0,True,"val/accuracy,val/IIA",max
+ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,,True,0.65,False,256.0,0.0,True,"val/accuracy,val/IIA",max

benchmark_cases_metadata.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d42203edfeb52102b4df24aecf54b5a51c9c4f547b6cede024422c898564f69f
-size 56701

 version https://git-lfs.github.com/spec/v1
+oid sha256:568194933b940c4c03457b1c64a8cb074943dfd075ff83f06e84a6376e3a8dcf
+size 58286

benchmark_metadata.json CHANGED Viewed

@@ -27,16 +27,16 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json"
         }
       ],
       "transformer_cfg": {
@@ -66,7 +66,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
@@ -91,7 +90,7 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
@@ -104,8 +103,8 @@
         "clip_grad_norm": 1.0,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
     },
     {
@@ -125,16 +124,16 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json"
         }
       ],
       "transformer_cfg": {
@@ -164,7 +163,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "bidirectional",
         "attn_only": false,
@@ -189,7 +187,7 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
@@ -202,8 +200,8 @@
         "clip_grad_norm": 1.0,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
     },
     {
@@ -225,26 +223,26 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 12,
         "n_ctx": 10,
-        "d_head": 3,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 48,
         "act_fn": "gelu",
         "d_vocab": 7,
         "eps": 1e-05,
@@ -264,7 +262,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "bidirectional",
         "attn_only": false,
@@ -277,7 +274,7 @@
         "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3456,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -289,21 +286,22 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 0.4,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
     },
     {
@@ -323,16 +321,16 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json"
         }
       ],
       "transformer_cfg": {
@@ -362,7 +360,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
@@ -387,7 +384,7 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
@@ -400,8 +397,8 @@
         "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
     },
     {
@@ -430,26 +427,26 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
         },
         {
-          "file_name": "ll_model_1110.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth"
         },
         {
-          "file_name": "ll_model_cfg_1110.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl"
         },
         {
-          "file_name": "meta_1110.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
         "act_fn": "gelu",
         "d_vocab": 14,
         "eps": 1e-05,
@@ -469,7 +466,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cuda",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
@@ -482,7 +478,7 @@
         "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -494,21 +490,22 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 1.0,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
     },
     {
@@ -528,26 +525,26 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 20,
         "n_ctx": 10,
-        "d_head": 5,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 80,
         "act_fn": "gelu",
         "d_vocab": 5,
         "eps": 1e-05,
@@ -567,12 +564,11 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1885618083164127,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
@@ -580,7 +576,7 @@
         "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 9600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -592,27 +588,28 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 0.4,
-        "epochs": 500,
         "act_fn": "gelu",
-        "clip_grad_norm": 1.0,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
     },
     {
-      "case_id": "24",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24",
-      "task_description": "Identifies the first occurrence of each token in a sequence.",
       "vocab": [
         "a",
         "b",
@@ -623,29 +620,29 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 20,
         "n_ctx": 10,
         "d_head": 5,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 80,
         "act_fn": "gelu",
         "d_vocab": 5,
         "eps": 1e-05,
@@ -665,20 +662,19 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1885618083164127,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 9600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -690,7 +686,7 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
@@ -698,14 +694,118 @@
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
         "strict_weight": 0.4,
-        "epochs": 500,
         "act_fn": "gelu",
-        "clip_grad_norm": 1.0,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
     },
     {
       "case_id": "3",
@@ -725,16 +825,16 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
         },
         {
-          "file_name": "ll_model_10110.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth"
         },
         {
-          "file_name": "ll_model_cfg_10110.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl"
         },
         {
-          "file_name": "meta_10110.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json"
         }
       ],
       "transformer_cfg": {
@@ -764,7 +864,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
@@ -789,7 +888,7 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
@@ -802,8 +901,8 @@
         "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
     },
     {
@@ -828,16 +927,16 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json"
         }
       ],
       "transformer_cfg": {
@@ -867,7 +966,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
@@ -892,7 +990,7 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
@@ -905,8 +1003,8 @@
         "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
     },
     {
@@ -931,26 +1029,26 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
         "act_fn": "gelu",
         "d_vocab": 10,
         "eps": 1e-05,
@@ -970,7 +1068,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
@@ -983,7 +1080,7 @@
         "d_vocab_out": 5,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -995,21 +1092,22 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 0.4,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
     },
     {
@@ -1034,26 +1132,26 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
         "act_fn": "gelu",
         "d_vocab": 10,
         "eps": 1e-05,
@@ -1073,7 +1171,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
@@ -1086,7 +1183,7 @@
         "d_vocab_out": 8,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1098,21 +1195,22 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 0.4,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
     },
     {
@@ -1132,26 +1230,26 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
         },
         {
-          "file_name": "ll_model_10110.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth"
         },
         {
-          "file_name": "ll_model_cfg_10110.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl"
         },
         {
-          "file_name": "meta_10110.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 4,
         "n_ctx": 10,
         "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
         "act_fn": "gelu",
         "d_vocab": 5,
         "eps": 1e-05,
@@ -1171,7 +1269,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cuda",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
@@ -1184,7 +1281,7 @@
         "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1196,21 +1293,22 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 10.0,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
     },
     {
@@ -1235,26 +1333,26 @@
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
         "act_fn": "gelu",
         "d_vocab": 10,
         "eps": 1e-05,
@@ -1274,7 +1372,6 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
@@ -1287,7 +1384,7 @@
         "d_vocab_out": 8,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1299,28 +1396,31 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 0.4,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
     },
     {
-      "case_id": "38",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38",
-      "task_description": "Checks if tokens alternate between two types.",
       "vocab": [
         "a",
         "b",
         "c"
@@ -1330,19 +1430,19 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json"
         }
       ],
       "transformer_cfg": {
@@ -1354,7 +1454,7 @@
         "n_heads": 4,
         "d_mlp": 80,
         "act_fn": "gelu",
-        "d_vocab": 5,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1372,17 +1472,16 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1539600717839002,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 9600,
@@ -1397,7 +1496,7 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
@@ -1410,39 +1509,42 @@
         "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl"
     },
     {
-      "case_id": "4",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4",
-      "task_description": "Return fraction of previous open tokens minus the fraction of close tokens.",
       "vocab": [
-        "(",
-        ")",
-        "a",
         "b",
-        "c"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json"
         }
       ],
       "transformer_cfg": {
@@ -1454,7 +1556,7 @@
         "n_heads": 4,
         "d_mlp": 80,
         "act_fn": "gelu",
-        "d_vocab": 7,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1472,17 +1574,16 @@
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.17056057308448835,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 1,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 9600,
@@ -1497,143 +1598,123 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
         "strict_weight": 0.4,
-        "epochs": 2000,
         "act_fn": "gelu",
-        "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
     },
     {
-      "case_id": "8",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8",
-      "task_description": "Fills gaps between tokens with a specified filler.",
-      "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
-      ],
-      "max_seq_len": 10,
-      "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
         },
         {
-          "file_name": "ll_model_510.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth"
         },
         {
-          "file_name": "ll_model_cfg_510.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl"
         },
         {
-          "file_name": "meta_510.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 20,
-        "n_ctx": 10,
-        "d_head": 5,
-        "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 80,
-        "act_fn": "gelu",
-        "d_vocab": 10,
         "eps": 1e-05,
-        "use_attn_result": true,
         "use_attn_scale": true,
-        "use_split_qkv_input": true,
-        "use_hook_mlp_in": true,
         "use_attn_in": false,
         "use_local_attn": false,
-        "original_architecture": null,
         "from_checkpoint": false,
         "checkpoint_index": null,
         "checkpoint_label_type": null,
         "checkpoint_value": null,
-        "tokenizer_name": null,
         "window_size": null,
         "attn_types": null,
         "init_mode": "gpt2",
-        "normalization_type": null,
-        "device": "cpu",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
-        "seed": 0,
-        "initializer_range": 0.13333333333333333,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 8,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 9600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "dtype": "torch.float32",
-        "tokenizer_prepends_bos": null,
         "n_key_value_heads": null,
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 0.4,
-        "epochs": 500,
-        "act_fn": "gelu",
-        "clip_grad_norm": 1.0,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
-    },
-    {
-      "case_id": "ioi",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi",
-      "task_description": "Indirect object identification",
-      "max_seq_len": 16,
-      "min_seq_len": 16,
-      "files": [
-        {
-          "file_name": "corr_100_100_40.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/corr_100_100_40.json"
-        },
-        {
-          "file_name": "ll_model_100_100_40.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
-        }
-      ],
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
     },
     {
       "case_id": "ioi_next_token",
@@ -1643,19 +1724,98 @@
       "min_seq_len": 16,
       "files": [
         {
-          "file_name": "corr_100_100_40.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/corr_100_100_40.json"
         },
         {
-          "file_name": "ll_model_100_100_40.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth"
         },
         {
-          "file_name": "training_args.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/training_args.json"
         }
       ],
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth"
     }
   ]
 }

           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json"
         }
       ],
       "transformer_cfg": {
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
         "clip_grad_norm": 1.0,
         "lr_scheduler": ""
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json"
         }
       ],
       "transformer_cfg": {
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "bidirectional",
         "attn_only": false,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
         "clip_grad_norm": 1.0,
         "lr_scheduler": ""
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
+        "d_model": 26,
         "n_ctx": 10,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
+        "d_mlp": 104,
         "act_fn": "gelu",
         "d_vocab": 7,
         "eps": 1e-05,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "bidirectional",
         "attn_only": false,
         "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 15808,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "strict_weight": 1.0,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json"
         }
       ],
       "transformer_cfg": {
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
         "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
+        "d_model": 13,
         "n_ctx": 10,
+        "d_head": 3,
         "model_name": "custom",
         "n_heads": 4,
+        "d_mlp": 52,
         "act_fn": "gelu",
         "d_vocab": 14,
         "eps": 1e-05,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 3952,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
+        "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "strict_weight": 0.4,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json"
         }
       ],
       "transformer_cfg": {
+        "n_layers": 4,
+        "d_model": 50,
         "n_ctx": 10,
+        "d_head": 12,
         "model_name": "custom",
         "n_heads": 4,
+        "d_mlp": 200,
         "act_fn": "gelu",
         "d_vocab": 5,
         "eps": 1e-05,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
+        "initializer_range": 0.09847319278346618,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 118400,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
+        "lr": 0.0005,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "strict_weight": 0.5,
+        "epochs": 2000,
         "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
     },
     {
+      "case_id": "26",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26",
+      "task_description": "Creates a cascading effect by repeating each token in sequence incrementally.",
       "vocab": [
         "a",
         "b",
       "files": [
         {
           "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
+        "d_model": 21,
         "n_ctx": 10,
         "d_head": 5,
         "model_name": "custom",
         "n_heads": 4,
+        "d_mlp": 84,
         "act_fn": "gelu",
         "d_vocab": 5,
         "eps": 1e-05,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
+        "initializer_range": 0.12344267996967354,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
+        "d_vocab_out": 27,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 10416,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
         "strict_weight": 0.4,
+        "epochs": 2000,
         "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
+    },
+    {
+      "case_id": "29",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29",
+      "task_description": "Creates abbreviations for each token in the sequence.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 13,
+        "n_ctx": 10,
+        "d_head": 3,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 52,
+        "act_fn": "gelu",
+        "d_vocab": 10,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1539600717839002,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 8,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 3952,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
     },
     {
       "case_id": "3",
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json"
         }
       ],
       "transformer_cfg": {
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
         "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json"
         }
       ],
       "transformer_cfg": {
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
         "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
+        "d_model": 16,
         "n_ctx": 10,
+        "d_head": 4,
         "model_name": "custom",
         "n_heads": 4,
+        "d_mlp": 64,
         "act_fn": "gelu",
         "d_vocab": 10,
         "eps": 1e-05,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "d_vocab_out": 5,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 6144,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
+        "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "strict_weight": 1.0,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
+        "d_model": 9,
         "n_ctx": 10,
+        "d_head": 2,
         "model_name": "custom",
         "n_heads": 4,
+        "d_mlp": 36,
         "act_fn": "gelu",
         "d_vocab": 10,
         "eps": 1e-05,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "d_vocab_out": 8,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 1872,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
+        "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "strict_weight": 1.0,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
+        "d_model": 6,
         "n_ctx": 10,
         "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
+        "d_mlp": 24,
         "act_fn": "gelu",
         "d_vocab": 5,
         "eps": 1e-05,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 768,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
+        "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "strict_weight": 1.0,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
     },
     {
           "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
+        "d_model": 12,
         "n_ctx": 10,
+        "d_head": 3,
         "model_name": "custom",
         "n_heads": 4,
+        "d_mlp": 48,
         "act_fn": "gelu",
         "d_vocab": 10,
         "eps": 1e-05,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "d_vocab_out": 8,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 3456,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
+        "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "strict_weight": 1.0,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
     },
     {
+      "case_id": "4",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4",
+      "task_description": "Return fraction of previous open tokens minus the fraction of close tokens.",
       "vocab": [
+        "(",
+        ")",
         "a",
         "b",
         "c"
       "files": [
         {
           "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_heads": 4,
         "d_mlp": 80,
         "act_fn": "gelu",
+        "d_vocab": 7,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
+        "initializer_range": 0.17056057308448835,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
+        "d_vocab_out": 1,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 9600,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
         "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
     },
     {
+      "case_id": "8",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8",
+      "task_description": "Fills gaps between tokens with a specified filler.",
       "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
         "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_heads": 4,
         "d_mlp": 80,
         "act_fn": "gelu",
+        "d_vocab": 10,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
         "attn_types": null,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
+        "initializer_range": 0.13333333333333333,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
+        "d_vocab_out": 8,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 9600,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
+        "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
         "strict_weight": 0.4,
+        "epochs": 500,
         "act_fn": "gelu",
+        "clip_grad_norm": 1.0,
         "lr_scheduler": ""
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
     },
     {
+      "case_id": "ioi",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi",
+      "task_description": "Indirect object identification",
+      "max_seq_len": 16,
+      "min_seq_len": 16,
       "files": [
         {
           "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl"
         },
         {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json"
         }
       ],
       "transformer_cfg": {
+        "n_layers": 6,
+        "d_model": 64,
+        "n_ctx": 1024,
+        "d_head": 16,
+        "model_name": "gpt2",
         "n_heads": 4,
+        "d_mlp": 3072,
+        "act_fn": "gelu_new",
+        "d_vocab": 50257,
         "eps": 1e-05,
+        "use_attn_result": false,
         "use_attn_scale": true,
+        "use_split_qkv_input": false,
+        "use_hook_mlp_in": false,
         "use_attn_in": false,
         "use_local_attn": false,
+        "original_architecture": "GPT2LMHeadModel",
         "from_checkpoint": false,
         "checkpoint_index": null,
         "checkpoint_label_type": null,
         "checkpoint_value": null,
+        "tokenizer_name": "gpt2",
         "window_size": null,
         "attn_types": null,
         "init_mode": "gpt2",
+        "normalization_type": "LNPre",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
+        "seed": null,
+        "initializer_range": 0.02886751345948129,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
+        "d_vocab_out": 50257,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 2457600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "dtype": "torch.float32",
+        "tokenizer_prepends_bos": false,
         "n_key_value_heads": null,
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl",
       "training_args": {
+        "next_token": true,
+        "non_ioi_thresh": 0.65,
+        "use_per_token_check": false,
+        "batch_size": 256,
+        "lr": 0.001,
+        "num_workers": 0,
+        "early_stop": true,
+        "lr_scheduler": null,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA"
+        ],
+        "scheduler_mode": "max",
+        "clip_grad_norm": 1.0,
         "atol": 0.05,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "strict_weight": 0.4
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl"
     },
     {
       "case_id": "ioi_next_token",
       "min_seq_len": 16,
       "files": [
         {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth"
         },
         {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl"
         },
         {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json"
         }
       ],
+      "transformer_cfg": {
+        "n_layers": 6,
+        "d_model": 64,
+        "n_ctx": 1024,
+        "d_head": 16,
+        "model_name": "gpt2",
+        "n_heads": 4,
+        "d_mlp": 3072,
+        "act_fn": "gelu_new",
+        "d_vocab": 50257,
+        "eps": 1e-05,
+        "use_attn_result": false,
+        "use_attn_scale": true,
+        "use_split_qkv_input": false,
+        "use_hook_mlp_in": false,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": "GPT2LMHeadModel",
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": "gpt2",
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": "LNPre",
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": null,
+        "initializer_range": 0.02886751345948129,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 50257,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 2457600,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": false,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl",
+      "training_args": {
+        "next_token": true,
+        "non_ioi_thresh": 0.65,
+        "use_per_token_check": false,
+        "batch_size": 256,
+        "lr": 0.001,
+        "num_workers": 0,
+        "early_stop": true,
+        "lr_scheduler": null,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA"
+        ],
+        "scheduler_mode": "max",
+        "clip_grad_norm": 1.0,
+        "atol": 0.05,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl"
     }
   ]
 }

benchmark_metadata_croissant.json CHANGED Viewed

@@ -412,7 +412,7 @@
           "@id": "transformer_cfg.n_layers",
           "name": "transformer_cfg.n_layers",
           "description": "Column 'transformer_cfg.n_layers' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -427,7 +427,7 @@
           "@id": "transformer_cfg.d_model",
           "name": "transformer_cfg.d_model",
           "description": "Column 'transformer_cfg.d_model' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -442,7 +442,7 @@
           "@id": "transformer_cfg.n_ctx",
           "name": "transformer_cfg.n_ctx",
           "description": "Column 'transformer_cfg.n_ctx' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -457,7 +457,7 @@
           "@id": "transformer_cfg.d_head",
           "name": "transformer_cfg.d_head",
           "description": "Column 'transformer_cfg.d_head' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -487,7 +487,7 @@
           "@id": "transformer_cfg.n_heads",
           "name": "transformer_cfg.n_heads",
           "description": "Column 'transformer_cfg.n_heads' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -502,7 +502,7 @@
           "@id": "transformer_cfg.d_mlp",
           "name": "transformer_cfg.d_mlp",
           "description": "Column 'transformer_cfg.d_mlp' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -532,7 +532,7 @@
           "@id": "transformer_cfg.d_vocab",
           "name": "transformer_cfg.d_vocab",
           "description": "Column 'transformer_cfg.d_vocab' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -652,7 +652,7 @@
           "@id": "transformer_cfg.original_architecture",
           "name": "transformer_cfg.original_architecture",
           "description": "Column 'transformer_cfg.original_architecture' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -677,57 +677,12 @@
             }
           }
         },
-        {
-          "@type": "cr:Field",
-          "@id": "transformer_cfg.checkpoint_index",
-          "name": "transformer_cfg.checkpoint_index",
-          "description": "Column 'transformer_cfg.checkpoint_index' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "transformer_cfg.checkpoint_index"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "transformer_cfg.checkpoint_label_type",
-          "name": "transformer_cfg.checkpoint_label_type",
-          "description": "Column 'transformer_cfg.checkpoint_label_type' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "transformer_cfg.checkpoint_label_type"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "transformer_cfg.checkpoint_value",
-          "name": "transformer_cfg.checkpoint_value",
-          "description": "Column 'transformer_cfg.checkpoint_value' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "transformer_cfg.checkpoint_value"
-            }
-          }
-        },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.tokenizer_name",
           "name": "transformer_cfg.tokenizer_name",
           "description": "Column 'transformer_cfg.tokenizer_name' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -737,36 +692,6 @@
             }
           }
         },
-        {
-          "@type": "cr:Field",
-          "@id": "transformer_cfg.window_size",
-          "name": "transformer_cfg.window_size",
-          "description": "Column 'transformer_cfg.window_size' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "transformer_cfg.window_size"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "transformer_cfg.attn_types",
-          "name": "transformer_cfg.attn_types",
-          "description": "Column 'transformer_cfg.attn_types' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "transformer_cfg.attn_types"
-            }
-          }
-        },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.init_mode",
@@ -787,28 +712,13 @@
           "@id": "transformer_cfg.normalization_type",
           "name": "transformer_cfg.normalization_type",
           "description": "Column 'transformer_cfg.normalization_type' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "transformer_cfg.normalization_type"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "transformer_cfg.device",
-          "name": "transformer_cfg.device",
-          "description": "Column 'transformer_cfg.device' from the parquet file describing all the cases in the benchmark.",
           "dataType": "sc:Text",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             },
             "extract": {
-              "column": "transformer_cfg.device"
             }
           }
         },
@@ -817,7 +727,7 @@
           "@id": "transformer_cfg.n_devices",
           "name": "transformer_cfg.n_devices",
           "description": "Column 'transformer_cfg.n_devices' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -952,7 +862,7 @@
           "@id": "transformer_cfg.d_vocab_out",
           "name": "transformer_cfg.d_vocab_out",
           "description": "Column 'transformer_cfg.d_vocab_out' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -977,27 +887,12 @@
             }
           }
         },
-        {
-          "@type": "cr:Field",
-          "@id": "transformer_cfg.rotary_dim",
-          "name": "transformer_cfg.rotary_dim",
-          "description": "Column 'transformer_cfg.rotary_dim' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "transformer_cfg.rotary_dim"
-            }
-          }
-        },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.n_params",
           "name": "transformer_cfg.n_params",
           "description": "Column 'transformer_cfg.n_params' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -1072,7 +967,7 @@
           "@id": "transformer_cfg.tokenizer_prepends_bos",
           "name": "transformer_cfg.tokenizer_prepends_bos",
           "description": "Column 'transformer_cfg.tokenizer_prepends_bos' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -1082,21 +977,6 @@
             }
           }
         },
-        {
-          "@type": "cr:Field",
-          "@id": "transformer_cfg.n_key_value_heads",
-          "name": "transformer_cfg.n_key_value_heads",
-          "description": "Column 'transformer_cfg.n_key_value_heads' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "transformer_cfg.n_key_value_heads"
-            }
-          }
-        },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.post_embedding_ln",
@@ -1117,7 +997,7 @@
           "@id": "transformer_cfg.rotary_base",
           "name": "transformer_cfg.rotary_base",
           "description": "Column 'transformer_cfg.rotary_base' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
@@ -1156,6 +1036,141 @@
               "column": "transformer_cfg.rotary_adjacent_pairs"
             }
           }
         }
       ]
     }

           "@id": "transformer_cfg.n_layers",
           "name": "transformer_cfg.n_layers",
           "description": "Column 'transformer_cfg.n_layers' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
           "@id": "transformer_cfg.d_model",
           "name": "transformer_cfg.d_model",
           "description": "Column 'transformer_cfg.d_model' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
           "@id": "transformer_cfg.n_ctx",
           "name": "transformer_cfg.n_ctx",
           "description": "Column 'transformer_cfg.n_ctx' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
           "@id": "transformer_cfg.d_head",
           "name": "transformer_cfg.d_head",
           "description": "Column 'transformer_cfg.d_head' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
           "@id": "transformer_cfg.n_heads",
           "name": "transformer_cfg.n_heads",
           "description": "Column 'transformer_cfg.n_heads' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
           "@id": "transformer_cfg.d_mlp",
           "name": "transformer_cfg.d_mlp",
           "description": "Column 'transformer_cfg.d_mlp' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
           "@id": "transformer_cfg.d_vocab",
           "name": "transformer_cfg.d_vocab",
           "description": "Column 'transformer_cfg.d_vocab' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
           "@id": "transformer_cfg.original_architecture",
           "name": "transformer_cfg.original_architecture",
           "description": "Column 'transformer_cfg.original_architecture' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             }
           }
         },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.tokenizer_name",
           "name": "transformer_cfg.tokenizer_name",
           "description": "Column 'transformer_cfg.tokenizer_name' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             }
           }
         },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.init_mode",
           "@id": "transformer_cfg.normalization_type",
           "name": "transformer_cfg.normalization_type",
           "description": "Column 'transformer_cfg.normalization_type' from the parquet file describing all the cases in the benchmark.",
           "dataType": "sc:Text",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             },
             "extract": {
+              "column": "transformer_cfg.normalization_type"
             }
           }
         },
           "@id": "transformer_cfg.n_devices",
           "name": "transformer_cfg.n_devices",
           "description": "Column 'transformer_cfg.n_devices' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
           "@id": "transformer_cfg.d_vocab_out",
           "name": "transformer_cfg.d_vocab_out",
           "description": "Column 'transformer_cfg.d_vocab_out' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             }
           }
         },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.n_params",
           "name": "transformer_cfg.n_params",
           "description": "Column 'transformer_cfg.n_params' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
           "@id": "transformer_cfg.tokenizer_prepends_bos",
           "name": "transformer_cfg.tokenizer_prepends_bos",
           "description": "Column 'transformer_cfg.tokenizer_prepends_bos' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             }
           }
         },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.post_embedding_ln",
           "@id": "transformer_cfg.rotary_base",
           "name": "transformer_cfg.rotary_base",
           "description": "Column 'transformer_cfg.rotary_base' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Integer",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
               "column": "transformer_cfg.rotary_adjacent_pairs"
             }
           }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.model_pair",
+          "name": "training_args.model_pair",
+          "description": "Column 'training_args.model_pair' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.model_pair"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.next_token",
+          "name": "training_args.next_token",
+          "description": "Column 'training_args.next_token' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.next_token"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.non_ioi_thresh",
+          "name": "training_args.non_ioi_thresh",
+          "description": "Column 'training_args.non_ioi_thresh' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.non_ioi_thresh"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.use_per_token_check",
+          "name": "training_args.use_per_token_check",
+          "description": "Column 'training_args.use_per_token_check' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.use_per_token_check"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.batch_size",
+          "name": "training_args.batch_size",
+          "description": "Column 'training_args.batch_size' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.batch_size"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.num_workers",
+          "name": "training_args.num_workers",
+          "description": "Column 'training_args.num_workers' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.num_workers"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.early_stop",
+          "name": "training_args.early_stop",
+          "description": "Column 'training_args.early_stop' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.early_stop"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.scheduler_val_metric",
+          "name": "training_args.scheduler_val_metric",
+          "description": "Column 'training_args.scheduler_val_metric' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.scheduler_val_metric"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.scheduler_mode",
+          "name": "training_args.scheduler_mode",
+          "description": "Column 'training_args.scheduler_mode' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.scheduler_mode"
+            }
+          }
         }
       ]
     }