init_from = 'resume' out_dir = 'out' start = "\n" num_samples = 10 max_new_tokens = 500 temperature = 0.8 top_k = 200 seed = 1337 device = 'cuda' dtype = 'bfloat16' compile = False