File size: 3,750 Bytes
e7747da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
{
  "model_type": "conditional_diffusion",
  "architecture": "OptimizedConditionedUNet",
  "task": "image-generation",
  "framework": "pytorch",
  "version": "1.0",
  
  "model_config": {
    "in_channels": 3,
    "out_channels": 3,
    "attr_dim": 18,
    "base_channels": 64,
    "time_embed_dim": 224,
    "num_layers": 4,
    "attention_layers": [],
    "dropout": 0.05,
    "activation": "silu",
    "normalization": "group_norm"
  },
  
  "training_config": {
    "num_epochs": 110,
    "batch_size": 16,
    "learning_rate": 2e-4,
    "optimizer": "adamw",
    "weight_decay": 0.01,
    "gradient_accumulation_steps": 2,
    "max_grad_norm": 1.0,
    "mixed_precision": "fp16",
    "warmup_steps": 200,
    "lr_scheduler": "cosine_annealing_warm_restarts",
    "T_0": 20,
    "eta_min": 1e-6
  },
  
  "diffusion_config": {
    "num_train_timesteps": 1000,
    "num_inference_steps": 50,
    "beta_start": 0.00085,
    "beta_end": 0.012,
    "beta_schedule": "scaled_linear",
    "prediction_type": "epsilon",
    "scheduler_type": "ddpm",
    "clip_sample": false,
    "clip_sample_range": 1.0
  },
  
  "data_config": {
    "image_size": 256,
    "num_channels": 3,
    "dataset": "cartoonset10k",
    "validation_split": 0.15,
    "augmentation": {
      "horizontal_flip": 0.3,
      "color_jitter": {
        "brightness": 0.1,
        "contrast": 0.1,
        "saturation": 0.1
      },
      "rotation": 5,
      "normalization": {
        "mean": [0.5, 0.5, 0.5],
        "std": [0.5, 0.5, 0.5]
      }
    }
  },
  
  "feature_config": {
    "extractor": "mediapipe",
    "num_attributes": 18,
    "attribute_names": [
      "eye_angle",
      "eye_lashes", 
      "eye_lid",
      "chin_length",
      "eyebrow_weight",
      "eyebrow_shape",
      "eyebrow_thickness",
      "face_shape",
      "facial_hair",
      "hair",
      "eye_color",
      "face_color",
      "hair_color",
      "glasses",
      "glasses_color",
      "eye_slant",
      "eyebrow_width",
      "eye_eyebrow_distance"
    ],
    "attribute_ranges": {
      "eye_angle": [0, 2],
      "eye_lashes": [0, 1],
      "eye_lid": [0, 1],
      "chin_length": [0, 2],
      "eyebrow_weight": [0, 1],
      "eyebrow_shape": [0, 13],
      "eyebrow_thickness": [0, 3],
      "face_shape": [0, 6],
      "facial_hair": [0, 14],
      "hair": [0, 110],
      "eye_color": [0, 4],
      "face_color": [0, 10],
      "hair_color": [0, 9],
      "glasses": [0, 11],
      "glasses_color": [0, 6],
      "eye_slant": [0, 2],
      "eyebrow_width": [0, 2],
      "eye_eyebrow_distance": [0, 2]
    },
    "normalization": "min_max_01"
  },
  
  "performance_config": {
    "inference_time_gpu": "2-3 seconds",
    "inference_time_cpu": "15-30 seconds",
    "memory_usage_gpu": "4GB",
    "memory_usage_cpu": "2GB",
    "recommended_batch_size_gpu": 8,
    "recommended_batch_size_cpu": 1
  },
  
  "metrics": {
    "final_training_loss": 0.0234,
    "best_validation_loss": 0.0251,
    "training_samples": 8500,
    "validation_samples": 1500,
    "total_parameters": "~50M",
    "training_time": "~10 hours",
    "hardware": "NVIDIA T4 GPU"
  },
  
  "requirements": {
    "python": ">=3.8",
    "torch": ">=1.13.0",
    "torchvision": ">=0.14.0",
    "diffusers": ">=0.21.0",
    "mediapipe": ">=0.10.9",
    "opencv-python": ">=4.5.0",
    "numpy": ">=1.21.0",
    "pillow": ">=8.0.0",
    "accelerate": ">=0.20.0"
  },
  
  "tags": [
    "diffusion",
    "cartoon",
    "face-generation",
    "style-transfer",
    "conditional-generation",
    "selfie-to-cartoon",
    "pytorch",
    "computer-vision",
    "image-generation",
    "facial-attributes"
  ],
  
  "license": "mit",
  "language": "en",
  "library_name": "diffusers",
  "pipeline_tag": "image-generation"
}