| *.7z filter=lfs diff=lfs merge=lfs -text | |
| *.arrow filter=lfs diff=lfs merge=lfs -text | |
| *.bin filter=lfs diff=lfs merge=lfs -text | |
| *.bz2 filter=lfs diff=lfs merge=lfs -text | |
| *.ckpt filter=lfs diff=lfs merge=lfs -text | |
| *.ftz filter=lfs diff=lfs merge=lfs -text | |
| *.gz filter=lfs diff=lfs merge=lfs -text | |
| *.h5 filter=lfs diff=lfs merge=lfs -text | |
| *.joblib filter=lfs diff=lfs merge=lfs -text | |
| *.lfs.* filter=lfs diff=lfs merge=lfs -text | |
| *.mlmodel filter=lfs diff=lfs merge=lfs -text | |
| *.model filter=lfs diff=lfs merge=lfs -text | |
| *.msgpack filter=lfs diff=lfs merge=lfs -text | |
| *.npy filter=lfs diff=lfs merge=lfs -text | |
| *.npz filter=lfs diff=lfs merge=lfs -text | |
| *.onnx filter=lfs diff=lfs merge=lfs -text | |
| *.ot filter=lfs diff=lfs merge=lfs -text | |
| *.parquet filter=lfs diff=lfs merge=lfs -text | |
| *.pb filter=lfs diff=lfs merge=lfs -text | |
| *.pickle filter=lfs diff=lfs merge=lfs -text | |
| *.pkl filter=lfs diff=lfs merge=lfs -text | |
| *.pt filter=lfs diff=lfs merge=lfs -text | |
| *.pth filter=lfs diff=lfs merge=lfs -text | |
| *.rar filter=lfs diff=lfs merge=lfs -text | |
| *.safetensors filter=lfs diff=lfs merge=lfs -text | |
| saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| *.tar.* filter=lfs diff=lfs merge=lfs -text | |
| *.tar filter=lfs diff=lfs merge=lfs -text | |
| *.tflite filter=lfs diff=lfs merge=lfs -text | |
| *.tgz filter=lfs diff=lfs merge=lfs -text | |
| *.wasm filter=lfs diff=lfs merge=lfs -text | |
| *.xz filter=lfs diff=lfs merge=lfs -text | |
| *.zip filter=lfs diff=lfs merge=lfs -text | |
| *.zst filter=lfs diff=lfs merge=lfs -text | |
| *tfevents* filter=lfs diff=lfs merge=lfs -text | |
| bart_baseline/bart_baseline_results.json filter=lfs diff=lfs merge=lfs -text | |
| llama_xsum_finetuned/final_model/test_predictions.json filter=lfs diff=lfs merge=lfs -text | |
| llama_xsum_finetuned/final_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text | |
| llama_xsum_finetuned/test_predictions.json filter=lfs diff=lfs merge=lfs -text | |
| t5_base_xsum/test_predictions.json filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_balance_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_decoder_layer_0_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_decoder_layer_0_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_decoder_layer_1_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_decoder_layer_1_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_decoder_layer_2_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_decoder_layer_2_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_decoder_layer_3_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_decoder_layer_3_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_encoder_layer_0_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_encoder_layer_0_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_encoder_layer_1_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_encoder_layer_1_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_encoder_layer_2_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_encoder_layer_2_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_encoder_layer_3_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_heatmap_encoder_layer_3_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_usage_over_epochs_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/expert_usage_over_epochs_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/routing_comparison.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/test_expert_usage_hash.png filter=lfs diff=lfs merge=lfs -text | |
| vizualisations/test_expert_usage_token_choice.png filter=lfs diff=lfs merge=lfs -text | |
| results/gqa_comparison/gqa_vs_mha_comparison.png filter=lfs diff=lfs merge=lfs -text | |
| results/lb_comparison/expert_balance_comparison.png filter=lfs diff=lfs merge=lfs -text | |
| results/lb_comparison/loss_comparison.png filter=lfs diff=lfs merge=lfs -text | |
| results/moe_hash/moe_hash_test_predictions.json filter=lfs diff=lfs merge=lfs -text | |
| results/moe_hash/training_curves.png filter=lfs diff=lfs merge=lfs -text | |
| results/moe_token_choice/moe_token_choice_test_predictions.json filter=lfs diff=lfs merge=lfs -text | |
| results/moe_token_choice/training_curves.png filter=lfs diff=lfs merge=lfs -text | |