Spaces:

vedaco
/

veda-programming

Sleeping

App Files Files Community

vedaco commited on Jan 9

Commit

5b1197f

verified ·

1 Parent(s): dbb535a

Create model.py

Browse files

Files changed (1) hide show

model.py +281 -0

model.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+import numpy as np
+from typing import Optional
+class PositionalEncoding(layers.Layer):
+    """Positional encoding layer for transformer"""
+    def __init__(self, max_length: int, d_model: int, **kwargs):
+        super().__init__(**kwargs)
+        self.max_length = max_length
+        self.d_model = d_model
+        # Create positional encoding matrix
+        position = np.arange(max_length)[:, np.newaxis]
+        div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))
+        pe = np.zeros((max_length, d_model))
+        pe[:, 0::2] = np.sin(position * div_term)
+        pe[:, 1::2] = np.cos(position * div_term)
+        self.positional_encoding = tf.constant(pe, dtype=tf.float32)
+    def call(self, x):
+        seq_length = tf.shape(x)[1]
+        return x + self.positional_encoding[:seq_length, :]
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'max_length': self.max_length,
+            'd_model': self.d_model
+        })
+        return config
+class TransformerBlock(layers.Layer):
+    """Transformer decoder block"""
+    def __init__(self, d_model: int, num_heads: int, ff_dim: int,
+                 dropout_rate: float = 0.1, **kwargs):
+        super().__init__(**kwargs)
+        self.d_model = d_model
+        self.num_heads = num_heads
+        self.ff_dim = ff_dim
+        self.dropout_rate = dropout_rate
+        self.attention = layers.MultiHeadAttention(
+            num_heads=num_heads,
+            key_dim=d_model // num_heads,
+            dropout=dropout_rate
+        )
+        self.ffn = keras.Sequential([
+            layers.Dense(ff_dim, activation='gelu'),
+            layers.Dropout(dropout_rate),
+            layers.Dense(d_model),
+            layers.Dropout(dropout_rate)
+        ])
+        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
+        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
+        self.dropout = layers.Dropout(dropout_rate)
+    def call(self, x, training=False, mask=None):
+        # Causal self-attention
+        attn_output = self.attention(
+            query=x,
+            value=x,
+            key=x,
+            attention_mask=mask,
+            training=training
+        )
+        attn_output = self.dropout(attn_output, training=training)
+        out1 = self.layernorm1(x + attn_output)
+        # Feed forward network
+        ffn_output = self.ffn(out1, training=training)
+        return self.layernorm2(out1 + ffn_output)
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'd_model': self.d_model,
+            'num_heads': self.num_heads,
+            'ff_dim': self.ff_dim,
+            'dropout_rate': self.dropout_rate
+        })
+        return config
+class VedaProgrammingLLM(keras.Model):
+    """Veda Programming Language Model"""
+    def __init__(
+        self,
+        vocab_size: int,
+        max_length: int = 512,
+        d_model: int = 256,
+        num_heads: int = 8,
+        num_layers: int = 6,
+        ff_dim: int = 1024,
+        dropout_rate: float = 0.1,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.vocab_size = vocab_size
+        self.max_length = max_length
+        self.d_model = d_model
+        self.num_heads = num_heads
+        self.num_layers = num_layers
+        self.ff_dim = ff_dim
+        self.dropout_rate = dropout_rate
+        # Embedding layers
+        self.token_embedding = layers.Embedding(
+            input_dim=vocab_size,
+            output_dim=d_model
+        )
+        self.positional_encoding = PositionalEncoding(max_length, d_model)
+        self.dropout = layers.Dropout(dropout_rate)
+        # Transformer blocks
+        self.transformer_blocks = [
+            TransformerBlock(d_model, num_heads, ff_dim, dropout_rate)
+            for _ in range(num_layers)
+        ]
+        # Output layer
+        self.output_layer = layers.Dense(vocab_size)
+    def _create_causal_mask(self, seq_length):
+        """Create causal attention mask"""
+        mask = tf.linalg.band_part(
+            tf.ones((seq_length, seq_length)), -1, 0
+        )
+        return mask
+    def call(self, inputs, training=False):
+        seq_length = tf.shape(inputs)[1]
+        # Create causal mask
+        mask = self._create_causal_mask(seq_length)
+        # Embeddings
+        x = self.token_embedding(inputs)
+        x = x * tf.math.sqrt(tf.cast(self.d_model, tf.float32))
+        x = self.positional_encoding(x)
+        x = self.dropout(x, training=training)
+        # Transformer blocks
+        for transformer_block in self.transformer_blocks:
+            x = transformer_block(x, training=training, mask=mask)
+        # Output projection
+        logits = self.output_layer(x)
+        return logits
+    def generate(
+        self,
+        prompt_tokens: list,
+        max_new_tokens: int = 100,
+        temperature: float = 0.7,
+        top_k: int = 50,
+        top_p: float = 0.9
+    ):
+        """Generate code given a prompt"""
+        generated = list(prompt_tokens)
+        for _ in range(max_new_tokens):
+            # Truncate if too long
+            context = generated[-self.max_length:]
+            # Get predictions
+            input_tensor = tf.expand_dims(context, 0)
+            logits = self(input_tensor, training=False)
+            next_token_logits = logits[0, -1, :] / temperature
+            # Apply top-k filtering
+            if top_k > 0:
+                top_k_logits, top_k_indices = tf.math.top_k(
+                    next_token_logits, k=min(top_k, self.vocab_size)
+                )
+                # Create mask for non-top-k tokens
+                indices_to_remove = tf.less(
+                    next_token_logits,
+                    top_k_logits[-1]
+                )
+                next_token_logits = tf.where(
+                    indices_to_remove,
+                    tf.ones_like(next_token_logits) * float('-inf'),
+                    next_token_logits
+                )
+            # Apply top-p (nucleus) filtering
+            if top_p < 1.0:
+                sorted_logits = tf.sort(next_token_logits, direction='DESCENDING')
+                sorted_probs = tf.nn.softmax(sorted_logits)
+                cumulative_probs = tf.cumsum(sorted_probs)
+                # Find cutoff
+                sorted_indices_to_remove = cumulative_probs > top_p
+                sorted_indices_to_remove = tf.concat([
+                    [False],
+                    sorted_indices_to_remove[:-1]
+                ], axis=0)
+                sorted_logits = tf.where(
+                    sorted_indices_to_remove,
+                    tf.ones_like(sorted_logits) * float('-inf'),
+                    sorted_logits
+                )
+            # Sample from distribution
+            probs = tf.nn.softmax(next_token_logits)
+            next_token = tf.random.categorical(
+                tf.expand_dims(next_token_logits, 0),
+                num_samples=1
+            )[0, 0]
+            generated.append(int(next_token.numpy()))
+            # Stop if end token
+            if next_token == 3:  # END token
+                break
+        return generated
+    def get_config(self):
+        return {
+            'vocab_size': self.vocab_size,
+            'max_length': self.max_length,
+            'd_model': self.d_model,
+            'num_heads': self.num_heads,
+            'num_layers': self.num_layers,
+            'ff_dim': self.ff_dim,
+            'dropout_rate': self.dropout_rate
+        }
+    @classmethod
+    def from_config(cls, config):
+        return cls(**config)
+def create_veda_model(
+    vocab_size: int,
+    max_length: int = 512,
+    model_size: str = "small"
+) -> VedaProgrammingLLM:
+    """Factory function to create Veda Programming model"""
+    configs = {
+        "small": {
+            "d_model": 256,
+            "num_heads": 4,
+            "num_layers": 4,
+            "ff_dim": 512
+        },
+        "medium": {
+            "d_model": 512,
+            "num_heads": 8,
+            "num_layers": 6,
+            "ff_dim": 1024
+        },
+        "large": {
+            "d_model": 768,
+            "num_heads": 12,
+            "num_layers": 12,
+            "ff_dim": 2048
+        }
+    }
+    config = configs.get(model_size, configs["small"])
+    model = VedaProgrammingLLM(
+        vocab_size=vocab_size,
+        max_length=max_length,
+        **config
+    )
+    return model