image_to_cartoonify / config.json
wizcodes12's picture
Create config.json
e7747da verified
{
"model_type": "conditional_diffusion",
"architecture": "OptimizedConditionedUNet",
"task": "image-generation",
"framework": "pytorch",
"version": "1.0",
"model_config": {
"in_channels": 3,
"out_channels": 3,
"attr_dim": 18,
"base_channels": 64,
"time_embed_dim": 224,
"num_layers": 4,
"attention_layers": [],
"dropout": 0.05,
"activation": "silu",
"normalization": "group_norm"
},
"training_config": {
"num_epochs": 110,
"batch_size": 16,
"learning_rate": 2e-4,
"optimizer": "adamw",
"weight_decay": 0.01,
"gradient_accumulation_steps": 2,
"max_grad_norm": 1.0,
"mixed_precision": "fp16",
"warmup_steps": 200,
"lr_scheduler": "cosine_annealing_warm_restarts",
"T_0": 20,
"eta_min": 1e-6
},
"diffusion_config": {
"num_train_timesteps": 1000,
"num_inference_steps": 50,
"beta_start": 0.00085,
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"prediction_type": "epsilon",
"scheduler_type": "ddpm",
"clip_sample": false,
"clip_sample_range": 1.0
},
"data_config": {
"image_size": 256,
"num_channels": 3,
"dataset": "cartoonset10k",
"validation_split": 0.15,
"augmentation": {
"horizontal_flip": 0.3,
"color_jitter": {
"brightness": 0.1,
"contrast": 0.1,
"saturation": 0.1
},
"rotation": 5,
"normalization": {
"mean": [0.5, 0.5, 0.5],
"std": [0.5, 0.5, 0.5]
}
}
},
"feature_config": {
"extractor": "mediapipe",
"num_attributes": 18,
"attribute_names": [
"eye_angle",
"eye_lashes",
"eye_lid",
"chin_length",
"eyebrow_weight",
"eyebrow_shape",
"eyebrow_thickness",
"face_shape",
"facial_hair",
"hair",
"eye_color",
"face_color",
"hair_color",
"glasses",
"glasses_color",
"eye_slant",
"eyebrow_width",
"eye_eyebrow_distance"
],
"attribute_ranges": {
"eye_angle": [0, 2],
"eye_lashes": [0, 1],
"eye_lid": [0, 1],
"chin_length": [0, 2],
"eyebrow_weight": [0, 1],
"eyebrow_shape": [0, 13],
"eyebrow_thickness": [0, 3],
"face_shape": [0, 6],
"facial_hair": [0, 14],
"hair": [0, 110],
"eye_color": [0, 4],
"face_color": [0, 10],
"hair_color": [0, 9],
"glasses": [0, 11],
"glasses_color": [0, 6],
"eye_slant": [0, 2],
"eyebrow_width": [0, 2],
"eye_eyebrow_distance": [0, 2]
},
"normalization": "min_max_01"
},
"performance_config": {
"inference_time_gpu": "2-3 seconds",
"inference_time_cpu": "15-30 seconds",
"memory_usage_gpu": "4GB",
"memory_usage_cpu": "2GB",
"recommended_batch_size_gpu": 8,
"recommended_batch_size_cpu": 1
},
"metrics": {
"final_training_loss": 0.0234,
"best_validation_loss": 0.0251,
"training_samples": 8500,
"validation_samples": 1500,
"total_parameters": "~50M",
"training_time": "~10 hours",
"hardware": "NVIDIA T4 GPU"
},
"requirements": {
"python": ">=3.8",
"torch": ">=1.13.0",
"torchvision": ">=0.14.0",
"diffusers": ">=0.21.0",
"mediapipe": ">=0.10.9",
"opencv-python": ">=4.5.0",
"numpy": ">=1.21.0",
"pillow": ">=8.0.0",
"accelerate": ">=0.20.0"
},
"tags": [
"diffusion",
"cartoon",
"face-generation",
"style-transfer",
"conditional-generation",
"selfie-to-cartoon",
"pytorch",
"computer-vision",
"image-generation",
"facial-attributes"
],
"license": "mit",
"language": "en",
"library_name": "diffusers",
"pipeline_tag": "image-generation"
}