{ "model_type": "conditional_diffusion", "architecture": "OptimizedConditionedUNet", "task": "image-generation", "framework": "pytorch", "version": "1.0", "model_config": { "in_channels": 3, "out_channels": 3, "attr_dim": 18, "base_channels": 64, "time_embed_dim": 224, "num_layers": 4, "attention_layers": [], "dropout": 0.05, "activation": "silu", "normalization": "group_norm" }, "training_config": { "num_epochs": 110, "batch_size": 16, "learning_rate": 2e-4, "optimizer": "adamw", "weight_decay": 0.01, "gradient_accumulation_steps": 2, "max_grad_norm": 1.0, "mixed_precision": "fp16", "warmup_steps": 200, "lr_scheduler": "cosine_annealing_warm_restarts", "T_0": 20, "eta_min": 1e-6 }, "diffusion_config": { "num_train_timesteps": 1000, "num_inference_steps": 50, "beta_start": 0.00085, "beta_end": 0.012, "beta_schedule": "scaled_linear", "prediction_type": "epsilon", "scheduler_type": "ddpm", "clip_sample": false, "clip_sample_range": 1.0 }, "data_config": { "image_size": 256, "num_channels": 3, "dataset": "cartoonset10k", "validation_split": 0.15, "augmentation": { "horizontal_flip": 0.3, "color_jitter": { "brightness": 0.1, "contrast": 0.1, "saturation": 0.1 }, "rotation": 5, "normalization": { "mean": [0.5, 0.5, 0.5], "std": [0.5, 0.5, 0.5] } } }, "feature_config": { "extractor": "mediapipe", "num_attributes": 18, "attribute_names": [ "eye_angle", "eye_lashes", "eye_lid", "chin_length", "eyebrow_weight", "eyebrow_shape", "eyebrow_thickness", "face_shape", "facial_hair", "hair", "eye_color", "face_color", "hair_color", "glasses", "glasses_color", "eye_slant", "eyebrow_width", "eye_eyebrow_distance" ], "attribute_ranges": { "eye_angle": [0, 2], "eye_lashes": [0, 1], "eye_lid": [0, 1], "chin_length": [0, 2], "eyebrow_weight": [0, 1], "eyebrow_shape": [0, 13], "eyebrow_thickness": [0, 3], "face_shape": [0, 6], "facial_hair": [0, 14], "hair": [0, 110], "eye_color": [0, 4], "face_color": [0, 10], "hair_color": [0, 9], "glasses": [0, 11], "glasses_color": [0, 6], "eye_slant": [0, 2], "eyebrow_width": [0, 2], "eye_eyebrow_distance": [0, 2] }, "normalization": "min_max_01" }, "performance_config": { "inference_time_gpu": "2-3 seconds", "inference_time_cpu": "15-30 seconds", "memory_usage_gpu": "4GB", "memory_usage_cpu": "2GB", "recommended_batch_size_gpu": 8, "recommended_batch_size_cpu": 1 }, "metrics": { "final_training_loss": 0.0234, "best_validation_loss": 0.0251, "training_samples": 8500, "validation_samples": 1500, "total_parameters": "~50M", "training_time": "~10 hours", "hardware": "NVIDIA T4 GPU" }, "requirements": { "python": ">=3.8", "torch": ">=1.13.0", "torchvision": ">=0.14.0", "diffusers": ">=0.21.0", "mediapipe": ">=0.10.9", "opencv-python": ">=4.5.0", "numpy": ">=1.21.0", "pillow": ">=8.0.0", "accelerate": ">=0.20.0" }, "tags": [ "diffusion", "cartoon", "face-generation", "style-transfer", "conditional-generation", "selfie-to-cartoon", "pytorch", "computer-vision", "image-generation", "facial-attributes" ], "license": "mit", "language": "en", "library_name": "diffusers", "pipeline_tag": "image-generation" }