Spaces:

Text-to-Document-Generation
/

Docgenie-API

Paused

Docgenie-API / data /models /handwriting /config.yaml

Ahadhassan-2003

deploy: update HF Space

dc4e6da 27 days ago

2.01 kB

	data:
	batch_size: 64
	num_workers: 8
	train_lmdb_path: ./iam_lmdbclear
	vocab_path: ./char_vocab.json
	model:
	latent_shape:
	- 4
	- 16
	- 64
	scheduler:
	beta_end: 0.012
	beta_schedule: linear
	beta_start: 0.00085
	num_train_timesteps: 1000
	prediction_type: epsilon
	text_encoder:
	d_ff: 1024
	d_model: 512
	dropout: 0.1
	max_length: 32
	num_heads: 8
	num_layers: 4
	output_dim: 512
	unet:
	act_fn: silu
	attention_head_dim: 8
	block_out_channels:
	- 192
	- 384
	- 768
	- 768
	cross_attention_dim: 512
	down_block_types:
	- DownBlock2D
	- CrossAttnDownBlock2D
	- CrossAttnDownBlock2D
	- DownBlock2D
	in_channels: 4
	layers_per_block: 2
	mid_block_type: UNetMidBlock2DCrossAttn
	norm_num_groups: 32
	num_class_embeds: 657
	out_channels: 4
	sample_size:
	- 16
	- 64
	up_block_types:
	- UpBlock2D
	- CrossAttnUpBlock2D
	- CrossAttnUpBlock2D
	- UpBlock2D
	vae:
	model_name: stabilityai/sd-vae-ft-mse
	training:
	compile_model: false
	ema_decay: 0.999
	ema_inv_gamma: 1.0
	ema_min_decay: 0.0
	ema_power: 1.0
	gradient_accumulation_steps: 1
	log_every_n_steps: 10
	lr_scheduler:
	min_lr: 1.0e-07
	type: cosine
	warmup_steps: 2000
	max_grad_norm: 1.0
	mixed_precision: bf16
	mode: latent
	num_epochs: 300
	num_inference_steps: 1000
	optimizer:
	beta1: 0.9
	beta2: 0.999
	eps: 1.0e-08
	lr: 0.0001
	type: adamw
	weight_decay: 0.01
	output_dir: ./experiments/hf_conditional_latent_batch64
	resume_from_checkpoint: null
	run_name: hf_conditional_latent_batch64
	sample_every_n_steps: 18000
	save_every_n_epochs: 10
	seed: 42
	use_channels_last: false
	use_ema: true
	wandb:
	api_key:
	entity: null
	notes: Hugging Face UNet with EMA and latent diffusion training.
	project: handwriting-diffusion
	tags:
	- hf
	- conditional
	- latent