Shavrina
/

RusEnQA

Text Generation

text2text-generation

Model card Files Files and versions

RusEnQA / deepspeed_config.json

Shavrina's picture

Upload deepspeed_config.json

ee14091 about 4 years ago

history blame contribute delete

653 Bytes

	{
	"train_micro_batch_size_per_gpu": 2,
	"gradient_accumulation_steps": 1,
	"steps_per_print": 100,
	"gradient_clipping": 1.0,
	"fp16": {
	"enabled": true,
	"loss_scale": 0,
	"loss_scale_window": 2000,
	"hysteresis": 2,
	"min_loss_scale": 0.0
	},
	"zero_optimization": {
	"stage": 2,
	"reduce_bucket_size": 50000000,
	"overlap_comm": true
	},
	"sparse_attention": {
	"mode": "fixed",
	"block": 16,
	"different_layout_per_head": true,
	"num_local_blocks": 8,
	"num_global_blocks": 1,
	"attention": "unidirectional",
	"horizontal_global_attention": false,
	"num_different_global_patterns": 8
	}
	}