Add Yi syllable diffusion model

Browse files

Files changed (5) hide show

README.md +47 -0
model_index.json +12 -0
scheduler/scheduler_config.json +19 -0
unet/config.json +46 -0
unet/diffusion_pytorch_model.safetensors +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+---
+license: apache-2.0
+tags:
+  - diffusion
+  - unconditional-image-generation
+  - ddpm
+  - diffusers
+  - yi-script
+library_name: diffusers
+pipeline_tag: unconditional-image-generation
+---
+# Yi Syllable Diffusion
+An unconditional **DDPM** that generates images of **Yi script syllables**
+(Unicode block `U+A000`–`U+A48C`). Trained on 1,165 glyphs rendered from the
+`NotoSansYi-Regular` font.
+## Usage
+```python
+from diffusers import DDPMPipeline
+pipe = DDPMPipeline.from_pretrained("pratik220704/yi-syllable-diffusion").to("cuda")
+image = pipe(num_inference_steps=50).images[0]
+image.save("yi.png")
+```
+## Training data
+1,165 grayscale 64×64 PNGs, one per Yi syllable, rendered with PIL from
+`NotoSansYi-Regular.ttf`.
+## Training procedure
+- Architecture: `UNet2DModel` (diffusers), 1-channel in/out, ~17 M params.
+- Noise schedule: cosine-beta DDPM (1000 steps) with **zero terminal SNR**.
+- Objective: **v-prediction**.
+- Sampler: `DDIMScheduler`, `timestep_spacing="trailing"`, `clip_sample=True`, 50 steps.
+- Optimizer: AdamW, lr 1e-4, cosine LR warmup. Epochs: 10.
+The zero-SNR + v-prediction recipe is what produces crisp black-on-white glyphs
+(plain epsilon-prediction yields a grey haze). FID (full dataset) ≈ 108.6.
+## Limitations
+Unconditional — you cannot request a specific syllable. Quality is bounded by the
+64 px resolution and short (10-epoch) training budget.
+## License
+Model weights: Apache-2.0. The Noto fonts are licensed under the SIL Open Font License.

model_index.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "_class_name": "DDPMPipeline",
+  "_diffusers_version": "0.31.0",
+  "scheduler": [
+    "diffusers",
+    "DDIMScheduler"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DModel"
+  ]
+}

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "_class_name": "DDIMScheduler",
+  "_diffusers_version": "0.31.0",
+  "beta_end": 0.02,
+  "beta_schedule": "squaredcos_cap_v2",
+  "beta_start": 0.0001,
+  "clip_sample": true,
+  "clip_sample_range": 1.0,
+  "dynamic_thresholding_ratio": 0.995,
+  "num_train_timesteps": 1000,
+  "prediction_type": "v_prediction",
+  "rescale_betas_zero_snr": true,
+  "sample_max_value": 1.0,
+  "set_alpha_to_one": true,
+  "steps_offset": 0,
+  "thresholding": false,
+  "timestep_spacing": "trailing",
+  "trained_betas": null
+}

unet/config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "_class_name": "UNet2DModel",
+  "_diffusers_version": "0.31.0",
+  "_name_or_path": "runs/full_v/unet",
+  "act_fn": "silu",
+  "add_attention": true,
+  "attention_head_dim": 8,
+  "attn_norm_num_groups": null,
+  "block_out_channels": [
+    64,
+    128,
+    128,
+    256
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "DownBlock2D",
+    "AttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "downsample_type": "conv",
+  "dropout": 0.0,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 1,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "num_train_timesteps": null,
+  "out_channels": 1,
+  "resnet_time_scale_shift": "default",
+  "sample_size": 64,
+  "time_embedding_type": "positional",
+  "up_block_types": [
+    "UpBlock2D",
+    "AttnUpBlock2D",
+    "UpBlock2D",
+    "UpBlock2D"
+  ],
+  "upsample_type": "conv"
+}

unet/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:874d87e631d36dde353bb35e7a6b8a96e1f912d53cfe2a91c57f3e29cdda3511
+size 68897084