""" Utility functions for AKASHA. """ import tensorflow as tf import numpy as np import json import os def load_config(config_path="config.json"): with open(config_path, "r") as f: config = json.load(f) return config def create_default_config(): return { "model": { "name": "AKASHA", "version": "1.0", "tokenizer": { "image_size": 256, "patch_size": 8, "num_tokens": 1024, "codebook_dim": 256, "encoder_hidden_dims": [64, 128, 256, 512], "decoder_hidden_dims": [512, 256, 128, 64], "commitment_cost": 0.25, "num_residual_blocks": 2, }, "transformer": { "num_layers": 24, "d_model": 1024, "num_heads": 16, "d_ff": 4096, "dropout_rate": 0.1, "max_sequence_length": 1024, "vocab_size": 1024, "use_rotary_embeddings": True, }, "generation": { "temperature": 0.9, "top_k": 100, "top_p": 0.95, }, }, "training": { "batch_size": 32, "learning_rate": 3e-4, "warmup_steps": 4000, "total_steps": 500000, "weight_decay": 0.01, "gradient_clip_norm": 1.0, "mixed_precision": True, "stage1": {"epochs": 100, "learning_rate": 1e-4, "batch_size": 64}, "stage2": {"epochs": 200, "learning_rate": 3e-4, "batch_size": 32}, }, "data": {"dataset": "imagenet", "image_size": 256, "augmentation": True}, "huggingface": {"repo_id": "vedaco/AKASHA", "space_sdk": "gradio"}, } class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): def __init__(self, base_lr, warmup_steps, total_steps, min_lr=1e-6): super().__init__() self.base_lr = base_lr self.warmup_steps = warmup_steps self.total_steps = total_steps self.min_lr = min_lr def __call__(self, step): step = tf.cast(step, tf.float32) warmup_lr = self.base_lr * (step / self.warmup_steps) progress = (step - self.warmup_steps) / (self.total_steps - self.warmup_steps) progress = tf.clip_by_value(progress, 0.0, 1.0) cosine_lr = self.min_lr + 0.5 * (self.base_lr - self.min_lr) * ( 1.0 + tf.cos(np.pi * progress) ) return tf.where(step < self.warmup_steps, warmup_lr, cosine_lr) def get_config(self): return { "base_lr": self.base_lr, "warmup_steps": self.warmup_steps, "total_steps": self.total_steps, "min_lr": self.min_lr, } def save_images_grid(images, filepath, grid_size=None): from PIL import Image as PILImage if isinstance(images, tf.Tensor): images = images.numpy() n = images.shape[0] if grid_size is None: grid_size = int(np.ceil(np.sqrt(n))) h, w = images.shape[1], images.shape[2] grid = np.zeros((grid_size * h, grid_size * w, 3), dtype=np.uint8) for i in range(min(n, grid_size * grid_size)): row, col = i // grid_size, i % grid_size img = (images[i] * 255).clip(0, 255).astype(np.uint8) grid[row * h : (row + 1) * h, col * w : (col + 1) * w] = img PILImage.fromarray(grid).save(filepath) return filepath def count_parameters(model): return sum(np.prod(v.shape) for v in model.trainable_variables) def get_model_summary(config): tok = config["model"]["tokenizer"] trans = config["model"]["transformer"] grid_size = tok["image_size"] // tok["patch_size"] seq_len = grid_size * grid_size print("=" * 60) print(" AKASHA Model Configuration") print("=" * 60) print(f" Image Size: {tok['image_size']}x{tok['image_size']}") print(f" Patch Size: {tok['patch_size']}x{tok['patch_size']}") print(f" Grid Size: {grid_size}x{grid_size}") print(f" Sequence Length: {seq_len} tokens") print(f" Codebook Size: {tok['num_tokens']}") print(f" Transformer: {trans['num_layers']}L / {trans['d_model']}D / {trans['num_heads']}H") print("=" * 60)