Spaces:

vedaco
/

AKASHA

Sleeping

App Files Files Community

vedaco commited on Apr 3

Commit

3304074

verified ·

1 Parent(s): a7b3073

Create akasha/utils.py

Browse files

Files changed (1) hide show

akasha/utils.py +127 -0

akasha/utils.py ADDED Viewed

	@@ -0,0 +1,127 @@

+"""
+Utility functions for AKASHA.
+"""
+import tensorflow as tf
+import numpy as np
+import json
+import os
+def load_config(config_path="config.json"):
+    with open(config_path, "r") as f:
+        config = json.load(f)
+    return config
+def create_default_config():
+    return {
+        "model": {
+            "name": "AKASHA",
+            "version": "1.0",
+            "tokenizer": {
+                "image_size": 256,
+                "patch_size": 8,
+                "num_tokens": 1024,
+                "codebook_dim": 256,
+                "encoder_hidden_dims": [64, 128, 256, 512],
+                "decoder_hidden_dims": [512, 256, 128, 64],
+                "commitment_cost": 0.25,
+                "num_residual_blocks": 2,
+            },
+            "transformer": {
+                "num_layers": 24,
+                "d_model": 1024,
+                "num_heads": 16,
+                "d_ff": 4096,
+                "dropout_rate": 0.1,
+                "max_sequence_length": 1024,
+                "vocab_size": 1024,
+                "use_rotary_embeddings": True,
+            },
+            "generation": {
+                "temperature": 0.9,
+                "top_k": 100,
+                "top_p": 0.95,
+            },
+        },
+        "training": {
+            "batch_size": 32,
+            "learning_rate": 3e-4,
+            "warmup_steps": 4000,
+            "total_steps": 500000,
+            "weight_decay": 0.01,
+            "gradient_clip_norm": 1.0,
+            "mixed_precision": True,
+            "stage1": {"epochs": 100, "learning_rate": 1e-4, "batch_size": 64},
+            "stage2": {"epochs": 200, "learning_rate": 3e-4, "batch_size": 32},
+        },
+        "data": {"dataset": "imagenet", "image_size": 256, "augmentation": True},
+        "huggingface": {"repo_id": "vedaco/AKASHA", "space_sdk": "gradio"},
+    }
+class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
+    def __init__(self, base_lr, warmup_steps, total_steps, min_lr=1e-6):
+        super().__init__()
+        self.base_lr = base_lr
+        self.warmup_steps = warmup_steps
+        self.total_steps = total_steps
+        self.min_lr = min_lr
+    def __call__(self, step):
+        step = tf.cast(step, tf.float32)
+        warmup_lr = self.base_lr * (step / self.warmup_steps)
+        progress = (step - self.warmup_steps) / (self.total_steps - self.warmup_steps)
+        progress = tf.clip_by_value(progress, 0.0, 1.0)
+        cosine_lr = self.min_lr + 0.5 * (self.base_lr - self.min_lr) * (
+            1.0 + tf.cos(np.pi * progress)
+        )
+        return tf.where(step < self.warmup_steps, warmup_lr, cosine_lr)
+    def get_config(self):
+        return {
+            "base_lr": self.base_lr,
+            "warmup_steps": self.warmup_steps,
+            "total_steps": self.total_steps,
+            "min_lr": self.min_lr,
+        }
+def save_images_grid(images, filepath, grid_size=None):
+    from PIL import Image as PILImage
+    if isinstance(images, tf.Tensor):
+        images = images.numpy()
+    n = images.shape[0]
+    if grid_size is None:
+        grid_size = int(np.ceil(np.sqrt(n)))
+    h, w = images.shape[1], images.shape[2]
+    grid = np.zeros((grid_size * h, grid_size * w, 3), dtype=np.uint8)
+    for i in range(min(n, grid_size * grid_size)):
+        row, col = i // grid_size, i % grid_size
+        img = (images[i] * 255).clip(0, 255).astype(np.uint8)
+        grid[row * h : (row + 1) * h, col * w : (col + 1) * w] = img
+    PILImage.fromarray(grid).save(filepath)
+    return filepath
+def count_parameters(model):
+    return sum(np.prod(v.shape) for v in model.trainable_variables)
+def get_model_summary(config):
+    tok = config["model"]["tokenizer"]
+    trans = config["model"]["transformer"]
+    grid_size = tok["image_size"] // tok["patch_size"]
+    seq_len = grid_size * grid_size
+    print("=" * 60)
+    print("  AKASHA Model Configuration")
+    print("=" * 60)
+    print(f"  Image Size:      {tok['image_size']}x{tok['image_size']}")
+    print(f"  Patch Size:      {tok['patch_size']}x{tok['patch_size']}")
+    print(f"  Grid Size:       {grid_size}x{grid_size}")
+    print(f"  Sequence Length: {seq_len} tokens")
+    print(f"  Codebook Size:   {tok['num_tokens']}")
+    print(f"  Transformer:     {trans['num_layers']}L / {trans['d_model']}D / {trans['num_heads']}H")
+    print("=" * 60)