[optimizer] optimizer = sgd learning_rate = 0.01 momentum = 0.9 betas = 0.9, 0.999 epsilon = 1e-8 weight_decay = 0 [training] loss = cross_entropy batch_size_train = 64 batch_size_test = 64 epochs = 50