|
|
|
|
|
|
|
|
|
|
|
def init_teacher_model(): |
|
|
|
|
|
|
|
|
|
|
|
vocab_path = args.arg_overrides['data'] + '/dict.txt' |
|
|
tokenizer = Dictionary.load(vocab_path) |
|
|
tokenizer.add_symbol('<mask>') |
|
|
|
|
|
model_pre = load_pretrained_ernierna(args.mlm_pretrained_model_path, args.arg_overrides) |
|
|
model = model_pre.encoder |
|
|
|
|
|
if args.debug: |
|
|
print('debug mode') |
|
|
num_layers_to_keep = 1 |
|
|
model.sentence_encoder.layers = model.sentence_encoder.layers[ |
|
|
:num_layers_to_keep] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Logger(f'教师模型(LLM)总参数量:{sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6:.3f} 百万, vocab_size={len(tokenizer)}') |
|
|
model = model.to(args.device) |
|
|
print(model) |
|
|
return model,tokenizer |