Upload folder using huggingface_hub
Browse files- tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/README.md +0 -0
- tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/adapter_config.json +0 -0
- tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/adapter_model.bin +0 -0
- tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/hidden_states_projector.pt +0 -0
- tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/projector.pt +0 -0
- tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/special_tokens_map.json +0 -0
- tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/tokenizer.json +0 -0
- tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/tokenizer_config.json +0 -0
tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/README.md
RENAMED
|
File without changes
|
tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/adapter_config.json
RENAMED
|
File without changes
|
tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/adapter_model.bin
RENAMED
|
File without changes
|
tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/hidden_states_projector.pt
RENAMED
|
File without changes
|
tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/projector.pt
RENAMED
|
File without changes
|
tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/special_tokens_map.json
RENAMED
|
File without changes
|
tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/tokenizer.json
RENAMED
|
File without changes
|
tinyllama/tinyllama-1.1b-3T/wctkd/criterion=wctkd__forward_kl-lora-rank=256-alpha=8-dropout=0.1-bf16__teacher=mistral__kd^rate=0.5__kd^temp=2.0__wctkd^alpha=0.5__wctkd^beta=0.2__wctkd^gamma=0.3__wctkd^hidden_gamma=0.5__wctkd^top_k=8__epoch=10__bsz=4x2x1=8__lr=0.001/{epoch7_step10003_loss2.3743_rougel29.8116 → epoch7_step10003_loss2.3743_rougel29.8115}/tokenizer_config.json
RENAMED
|
File without changes
|