| """ |
| Utility functions for AKASHA. |
| """ |
|
|
| import tensorflow as tf |
| import numpy as np |
| import json |
| import os |
|
|
|
|
| def load_config(config_path="config.json"): |
| with open(config_path, "r") as f: |
| config = json.load(f) |
| return config |
|
|
|
|
| def create_default_config(): |
| return { |
| "model": { |
| "name": "AKASHA", |
| "version": "1.0", |
| "tokenizer": { |
| "image_size": 256, |
| "patch_size": 8, |
| "num_tokens": 1024, |
| "codebook_dim": 256, |
| "encoder_hidden_dims": [64, 128, 256, 512], |
| "decoder_hidden_dims": [512, 256, 128, 64], |
| "commitment_cost": 0.25, |
| "num_residual_blocks": 2, |
| }, |
| "transformer": { |
| "num_layers": 24, |
| "d_model": 1024, |
| "num_heads": 16, |
| "d_ff": 4096, |
| "dropout_rate": 0.1, |
| "max_sequence_length": 1024, |
| "vocab_size": 1024, |
| "use_rotary_embeddings": True, |
| }, |
| "generation": { |
| "temperature": 0.9, |
| "top_k": 100, |
| "top_p": 0.95, |
| }, |
| }, |
| "training": { |
| "batch_size": 32, |
| "learning_rate": 3e-4, |
| "warmup_steps": 4000, |
| "total_steps": 500000, |
| "weight_decay": 0.01, |
| "gradient_clip_norm": 1.0, |
| "mixed_precision": True, |
| "stage1": {"epochs": 100, "learning_rate": 1e-4, "batch_size": 64}, |
| "stage2": {"epochs": 200, "learning_rate": 3e-4, "batch_size": 32}, |
| }, |
| "data": {"dataset": "imagenet", "image_size": 256, "augmentation": True}, |
| "huggingface": {"repo_id": "vedaco/AKASHA", "space_sdk": "gradio"}, |
| } |
|
|
|
|
| class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): |
| def __init__(self, base_lr, warmup_steps, total_steps, min_lr=1e-6): |
| super().__init__() |
| self.base_lr = base_lr |
| self.warmup_steps = warmup_steps |
| self.total_steps = total_steps |
| self.min_lr = min_lr |
|
|
| def __call__(self, step): |
| step = tf.cast(step, tf.float32) |
| warmup_lr = self.base_lr * (step / self.warmup_steps) |
| progress = (step - self.warmup_steps) / (self.total_steps - self.warmup_steps) |
| progress = tf.clip_by_value(progress, 0.0, 1.0) |
| cosine_lr = self.min_lr + 0.5 * (self.base_lr - self.min_lr) * ( |
| 1.0 + tf.cos(np.pi * progress) |
| ) |
| return tf.where(step < self.warmup_steps, warmup_lr, cosine_lr) |
|
|
| def get_config(self): |
| return { |
| "base_lr": self.base_lr, |
| "warmup_steps": self.warmup_steps, |
| "total_steps": self.total_steps, |
| "min_lr": self.min_lr, |
| } |
|
|
|
|
| def save_images_grid(images, filepath, grid_size=None): |
| from PIL import Image as PILImage |
|
|
| if isinstance(images, tf.Tensor): |
| images = images.numpy() |
| n = images.shape[0] |
| if grid_size is None: |
| grid_size = int(np.ceil(np.sqrt(n))) |
| h, w = images.shape[1], images.shape[2] |
| grid = np.zeros((grid_size * h, grid_size * w, 3), dtype=np.uint8) |
| for i in range(min(n, grid_size * grid_size)): |
| row, col = i // grid_size, i % grid_size |
| img = (images[i] * 255).clip(0, 255).astype(np.uint8) |
| grid[row * h : (row + 1) * h, col * w : (col + 1) * w] = img |
| PILImage.fromarray(grid).save(filepath) |
| return filepath |
|
|
|
|
| def count_parameters(model): |
| return sum(np.prod(v.shape) for v in model.trainable_variables) |
|
|
|
|
| def get_model_summary(config): |
| tok = config["model"]["tokenizer"] |
| trans = config["model"]["transformer"] |
| grid_size = tok["image_size"] // tok["patch_size"] |
| seq_len = grid_size * grid_size |
| print("=" * 60) |
| print(" AKASHA Model Configuration") |
| print("=" * 60) |
| print(f" Image Size: {tok['image_size']}x{tok['image_size']}") |
| print(f" Patch Size: {tok['patch_size']}x{tok['patch_size']}") |
| print(f" Grid Size: {grid_size}x{grid_size}") |
| print(f" Sequence Length: {seq_len} tokens") |
| print(f" Codebook Size: {tok['num_tokens']}") |
| print(f" Transformer: {trans['num_layers']}L / {trans['d_model']}D / {trans['num_heads']}H") |
| print("=" * 60) |