Spaces:

vedaco
/

AKASHA

Sleeping

App Files Files Community

AKASHA / akasha /utils.py

vedaco

Create akasha/utils.py

3304074 verified 2 months ago

raw

history blame contribute delete

4.34 kB

	"""
	Utility functions for AKASHA.
	"""

	import tensorflow as tf
	import numpy as np
	import json
	import os


	def load_config(config_path="config.json"):
	with open(config_path, "r") as f:
	config = json.load(f)
	return config


	def create_default_config():
	return {
	"model": {
	"name": "AKASHA",
	"version": "1.0",
	"tokenizer": {
	"image_size": 256,
	"patch_size": 8,
	"num_tokens": 1024,
	"codebook_dim": 256,
	"encoder_hidden_dims": [64, 128, 256, 512],
	"decoder_hidden_dims": [512, 256, 128, 64],
	"commitment_cost": 0.25,
	"num_residual_blocks": 2,
	},
	"transformer": {
	"num_layers": 24,
	"d_model": 1024,
	"num_heads": 16,
	"d_ff": 4096,
	"dropout_rate": 0.1,
	"max_sequence_length": 1024,
	"vocab_size": 1024,
	"use_rotary_embeddings": True,
	},
	"generation": {
	"temperature": 0.9,
	"top_k": 100,
	"top_p": 0.95,
	},
	},
	"training": {
	"batch_size": 32,
	"learning_rate": 3e-4,
	"warmup_steps": 4000,
	"total_steps": 500000,
	"weight_decay": 0.01,
	"gradient_clip_norm": 1.0,
	"mixed_precision": True,
	"stage1": {"epochs": 100, "learning_rate": 1e-4, "batch_size": 64},
	"stage2": {"epochs": 200, "learning_rate": 3e-4, "batch_size": 32},
	},
	"data": {"dataset": "imagenet", "image_size": 256, "augmentation": True},
	"huggingface": {"repo_id": "vedaco/AKASHA", "space_sdk": "gradio"},
	}


	class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
	def __init__(self, base_lr, warmup_steps, total_steps, min_lr=1e-6):
	super().__init__()
	self.base_lr = base_lr
	self.warmup_steps = warmup_steps
	self.total_steps = total_steps
	self.min_lr = min_lr

	def __call__(self, step):
	step = tf.cast(step, tf.float32)
	warmup_lr = self.base_lr * (step / self.warmup_steps)
	progress = (step - self.warmup_steps) / (self.total_steps - self.warmup_steps)
	progress = tf.clip_by_value(progress, 0.0, 1.0)
	cosine_lr = self.min_lr + 0.5 * (self.base_lr - self.min_lr) * (
	1.0 + tf.cos(np.pi * progress)
	)
	return tf.where(step < self.warmup_steps, warmup_lr, cosine_lr)

	def get_config(self):
	return {
	"base_lr": self.base_lr,
	"warmup_steps": self.warmup_steps,
	"total_steps": self.total_steps,
	"min_lr": self.min_lr,
	}


	def save_images_grid(images, filepath, grid_size=None):
	from PIL import Image as PILImage

	if isinstance(images, tf.Tensor):
	images = images.numpy()
	n = images.shape[0]
	if grid_size is None:
	grid_size = int(np.ceil(np.sqrt(n)))
	h, w = images.shape[1], images.shape[2]
	grid = np.zeros((grid_size * h, grid_size * w, 3), dtype=np.uint8)
	for i in range(min(n, grid_size * grid_size)):
	row, col = i // grid_size, i % grid_size
	img = (images[i] * 255).clip(0, 255).astype(np.uint8)
	grid[row * h : (row + 1) * h, col * w : (col + 1) * w] = img
	PILImage.fromarray(grid).save(filepath)
	return filepath


	def count_parameters(model):
	return sum(np.prod(v.shape) for v in model.trainable_variables)


	def get_model_summary(config):
	tok = config["model"]["tokenizer"]
	trans = config["model"]["transformer"]
	grid_size = tok["image_size"] // tok["patch_size"]
	seq_len = grid_size * grid_size
	print("=" * 60)
	print(" AKASHA Model Configuration")
	print("=" * 60)
	print(f" Image Size: {tok['image_size']}x{tok['image_size']}")
	print(f" Patch Size: {tok['patch_size']}x{tok['patch_size']}")
	print(f" Grid Size: {grid_size}x{grid_size}")
	print(f" Sequence Length: {seq_len} tokens")
	print(f" Codebook Size: {tok['num_tokens']}")
	print(f" Transformer: {trans['num_layers']}L / {trans['d_model']}D / {trans['num_heads']}H")
	print("=" * 60)