Upload 6 files

Browse files

Files changed (6) hide show

chat_interface.py +440 -0
config.json +24 -0
corex_tok.model +3 -0
corex_tok.vocab +0 -0
corex_tok_info.txt +9 -0
final_model.pt +3 -0

chat_interface.py ADDED Viewed

	@@ -0,0 +1,440 @@

+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import json
+import argparse
+import sys
+import sentencepiece as spm
+import math
+from dataclasses import dataclass
+# --- Define the CORRECT Model Architecture (copied from train_llm.py) ---
+@dataclass
+class ModelConfig:
+    vocab_size: int = 32000
+    hidden_size: int = 512
+    num_layers: int = 8
+    num_attention_heads: int = 8
+    num_key_value_heads: int = 2
+    intermediate_size: int = 1365
+    max_position_embeddings: int = 2048
+    rms_norm_eps: float = 1e-6
+    rope_theta: float = 10000.0
+class RMSNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-6):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+    def forward(self, hidden_states):
+        input_dtype = hidden_states.dtype
+        hidden_states = hidden_states.to(torch.float32)
+        variance = hidden_states.pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+        return self.weight * hidden_states.to(input_dtype)
+class RotaryEmbedding(nn.Module):
+    def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
+        super().__init__()
+        self.dim = dim
+        self.max_position_embeddings = max_position_embeddings
+        self.base = base
+        inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+    def forward(self, x, seq_len=None):
+        if seq_len is None:
+            seq_len = x.shape[-2]
+        t = torch.arange(seq_len, device=x.device, dtype=self.inv_freq.dtype)
+        freqs = torch.outer(t, self.inv_freq)
+        emb = torch.cat((freqs, freqs), dim=-1)
+        cos = emb.cos()
+        sin = emb.sin()
+        return cos, sin
+def rotate_half(x):
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None):
+    if position_ids is not None:
+        cos = cos[position_ids].unsqueeze(1)
+        sin = sin[position_ids].unsqueeze(1)
+    else:
+        cos = cos[:q.shape[-2]].unsqueeze(0).unsqueeze(0)
+        sin = sin[:q.shape[-2]].unsqueeze(0).unsqueeze(0)
+    q_embed = (q * cos) + (rotate_half(q) * sin)
+    k_embed = (k * cos) + (rotate_half(k) * sin)
+    return q_embed, k_embed
+class SwiGLU(nn.Module):
+    def __init__(self, hidden_size, intermediate_size):
+        super().__init__()
+        self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
+        self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
+        self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=False)
+    def forward(self, x):
+        gate = self.gate_proj(x)
+        up = self.up_proj(x)
+        return self.down_proj(F.silu(gate) * up)
+class GroupedQueryAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.num_key_value_heads = config.num_key_value_heads
+        self.num_key_value_groups = self.num_heads // self.num_key_value_heads
+        self.head_dim = self.hidden_size // self.num_heads
+        self.max_position_embeddings = config.max_position_embeddings
+        self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
+        self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=False)
+        self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=False)
+        self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
+        self.rotary_emb = RotaryEmbedding(
+            self.head_dim,
+            max_position_embeddings=self.max_position_embeddings,
+            base=config.rope_theta,
+        )
+    def forward(self, hidden_states, attention_mask=None, position_ids=None):
+        bsz, q_len, _ = hidden_states.size()
+        query_states = self.q_proj(hidden_states)
+        key_states = self.k_proj(hidden_states)
+        value_states = self.v_proj(hidden_states)
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        cos, sin = self.rotary_emb(value_states, seq_len=q_len)
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
+        # Repeat k/v heads if n_kv_heads < n_heads
+        key_states = torch.repeat_interleave(key_states, repeats=self.num_key_value_groups, dim=1)
+        value_states = torch.repeat_interleave(value_states, repeats=self.num_key_value_groups, dim=1)
+        attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
+        if attention_mask is not None:
+            # Convert from [batch_size, seq_len] to [batch_size, 1, 1, seq_len]
+            expanded_mask = attention_mask[:, None, None, :].to(attn_weights.dtype)
+            expanded_mask = (1.0 - expanded_mask) * torch.finfo(attn_weights.dtype).min
+            attn_weights = attn_weights + expanded_mask
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
+        attn_output = torch.matmul(attn_weights, value_states)
+        attn_output = attn_output.transpose(1, 2).contiguous()
+        attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+        attn_output = self.o_proj(attn_output)
+        return attn_output
+class TransformerBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.self_attn = GroupedQueryAttention(config)
+        self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.mlp = SwiGLU(config.hidden_size, config.intermediate_size)
+    def forward(self, hidden_states, attention_mask=None, position_ids=None):
+        residual = hidden_states
+        hidden_states = self.input_layernorm(hidden_states)
+        hidden_states = self.self_attn(hidden_states, attention_mask, position_ids)
+        hidden_states = residual + hidden_states
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+        return hidden_states
+class LLMModel(nn.Module): # REPLACED CustomTransformer with this class
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.layers = nn.ModuleList([TransformerBlock(config) for _ in range(config.num_layers)])
+        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+    def forward(self, input_ids, attention_mask=None, position_ids=None):
+        batch_size, seq_length = input_ids.shape
+        if position_ids is None:
+            position_ids = torch.arange(0, seq_length, dtype=torch.long, device=input_ids.device)
+            position_ids = position_ids.unsqueeze(0).expand(batch_size, -1)
+        # Create causal mask for generation
+        if attention_mask is None:
+            # Create a causal mask (lower triangular)
+            causal_mask = torch.full((seq_length, seq_length), float('-inf'), device=input_ids.device)
+            causal_mask = torch.triu(causal_mask, diagonal=1)
+            # [batch_size, 1, seq_len, seq_len]
+            attention_mask = causal_mask.unsqueeze(0).unsqueeze(0)
+        else:
+            # If a padding mask is provided, convert it to the format we need
+            # Assuming attention_mask is [batch_size, seq_len] with 1 for valid, 0 for pad
+            attention_mask = (1.0 - attention_mask) * torch.finfo(torch.float32).min
+            attention_mask = attention_mask[:, None, None, :]
+        hidden_states = self.embed_tokens(input_ids)
+        for layer in self.layers:
+            hidden_states = layer(hidden_states, attention_mask, position_ids)
+        hidden_states = self.norm(hidden_states)
+        logits = self.lm_head(hidden_states)
+        return logits
+    def generate(self, input_ids, max_new_tokens=100, do_sample=True, top_p=0.9, temperature=0.7):
+        """Simplified generation logic."""
+        self.eval()
+        generated = input_ids.clone()
+        for _ in range(max_new_tokens):
+            # Get logits for the last token
+            logits = self(generated)[:, -1, :] # shape: [batch_size, vocab_size]
+            if do_sample:
+                # Apply temperature
+                logits = logits / temperature
+                probs = torch.softmax(logits, dim=-1)
+                # Top-p (nucleus) sampling
+                sorted_probs, sorted_indices = torch.sort(probs, descending=True)
+                cumulative_probs = torch.cumsum(sorted_probs, dim=-1)
+                sorted_indices_to_remove = cumulative_probs > top_p
+                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+                sorted_indices_to_remove[..., 0] = 0
+                indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+                probs = probs.masked_fill(indices_to_remove, 0.0)
+                probs = probs / probs.sum(dim=-1, keepdim=True)
+                next_token = torch.multinomial(probs, num_samples=1)
+            else:
+                next_token = torch.argmax(logits, dim=-1, keepdim=True)
+            generated = torch.cat([generated, next_token], dim=-1)
+            # Stop if EOS is generated
+            if next_token.item() == 3:  # EOS token ID from your tokenizer
+                break
+        return generated
+# --- End of Model Architecture ---
+def load_tokenizer(tokenizer_path):
+    """Load the tokenizer and ensure it has an <UNK> token."""
+    print(f"Debug: Attempting to load tokenizer from {tokenizer_path}")
+    if not os.path.exists(tokenizer_path):
+        print(f"Error: Tokenizer file {tokenizer_path} does not exist")
+        return None
+    try:
+        sp = spm.SentencePieceProcessor()
+        sp.load(tokenizer_path)
+        if sp.unk_id() is None:
+            print("Warning: No <UNK> token in tokenizer. Using default ID 0.")
+            sp.add_unk_token(0)
+        print(f"Debug: Tokenizer loaded successfully. Vocab size: {sp.vocab_size()}")
+        return sp
+    except Exception as e:
+        print(f"Error loading tokenizer: {e}")
+        return None
+def load_model(model_path, config_path, device='cpu'):
+    """Load the model from checkpoint with detailed debugging."""
+    print(f"Debug: Attempting to load model from {model_path}")
+    print(f"Debug: Config path: {config_path}")
+    if not os.path.exists(model_path):
+        print(f"Error: Model file {model_path} does not exist")
+        return None, None
+    if not os.path.exists(config_path):
+        print(f"Warning: Config file {config_path} not found. Using default config.")
+    # Load config to get the correct parameters for the model
+    config_dict = {
+        'vocab_size': 32000,
+        'hidden_size': 512,
+        'num_layers': 8,
+        'num_attention_heads': 8,
+        'num_key_value_heads': 2,
+        'intermediate_size': 1365,
+        'max_position_embeddings': 2048,
+        'rms_norm_eps': 1e-6,
+        'rope_theta': 10000.0
+    }
+    try:
+        if os.path.exists(config_path):
+            with open(config_path, 'r') as f:
+                loaded_config = json.load(f)
+            # Update our config dict with the loaded values
+            for key in config_dict:
+                if key in loaded_config:
+                    config_dict[key] = loaded_config[key]
+            print(f"Debug: Config loaded: {config_dict}")
+    except Exception as e:
+        print(f"Warning: Failed to load config.json: {e}. Using default config.")
+    # Create a ModelConfig object
+    config = ModelConfig(**config_dict)
+    try:
+        print("Debug: Initializing LLMModel (correct architecture)")
+        model = LLMModel(config) # Now using the CORRECT model class
+    except Exception as e:
+        print(f"Error initializing model: {e}")
+        return None, None
+    try:
+        checkpoint = torch.load(model_path, map_location=device)
+        print(f"Debug: Checkpoint type: {type(checkpoint)}")
+        if isinstance(checkpoint, dict):
+            if 'model_state_dict' in checkpoint:
+                print("Debug: Loading from full checkpoint dict")
+                model.load_state_dict(checkpoint['model_state_dict'], strict=False)
+            else:
+                print("Debug: Loading state dictionary directly")
+                model.load_state_dict(checkpoint, strict=False)
+        else:
+            print("Debug: Loading full model object (not recommended)")
+            model = checkpoint
+        model.to(device)
+        model.eval()
+        print(f"Debug: Model loaded successfully on {device}")
+        return model, config
+    except Exception as e:
+        print(f"Error loading model checkpoint: {e}")
+        return None, None
+def preprocess_input(text, tokenizer, max_length=512):
+    """Preprocess and tokenize input text, handling OOV tokens."""
+    print(f"Debug: Preprocessing input: {text}")
+    text = ' '.join(text.strip().split())
+    if not text:
+        return None, "Input is empty. Please provide a valid input."
+    try:
+        # Use add_bos=True and add_eos=True as your training likely did
+        tokens = tokenizer.encode(text, out_type=int, add_bos=True, add_eos=True)
+        print(f"Debug: Tokenized input: {tokens}")
+        if len(tokens) > max_length:
+            # Truncate from the middle or end? Let's truncate from the end, keeping BOS
+            tokens = tokens[:max_length-1] + [tokenizer.eos_id()]
+        # Ensure the input is the right length
+        # For generation, we usually don't pad the input, we just use its actual length
+        # The model's attention mask will handle the rest.
+        return torch.tensor([tokens], dtype=torch.long), None
+    except Exception as e:
+        print(f"Tokenization error: {e}")
+        return None, f"Failed to tokenize input: {text}. Please try again."
+def generate_response(model, tokenizer, input_tokens, max_new_tokens=100, device='cpu'):
+    """Generate a response from the model."""
+    print(f"Debug: Generating response with input tokens shape: {input_tokens.shape}")
+    try:
+        input_tokens = input_tokens.to(device)
+        output_tokens = model.generate(input_tokens, max_new_tokens=max_new_tokens)
+        # Decode the entire sequence, then remove the input part
+        full_sequence = output_tokens[0].tolist()
+        # Find the EOS token that was originally added during preprocessing
+        input_length = input_tokens.shape[1]
+        # The response is the part after the input
+        response_tokens = full_sequence[input_length:]
+        response = tokenizer.decode(response_tokens)
+        print(f"Debug: Generated response: {response}")
+        return response, None
+    except Exception as e:
+        print(f"Inference error: {e}")
+        return None, "Failed to generate response. Please try again."
+def main():
+    print("🚀 Initializing CoreX AI Chat Interface...")
+    default_checkpoint_path = r"D:\checkpoints"
+    default_tokenizer_path = r"D:\CoreX\tokenizer\corex_tok.model"
+    parser = argparse.ArgumentParser(description="CoreX AI Chat Interface")
+    parser.add_argument('--model_path', default=default_checkpoint_path, help="Path to model checkpoints")
+    parser.add_argument('--tokenizer_path', default=default_tokenizer_path, help="Path to tokenizer")
+    args = parser.parse_args()
+    print("📁 Default paths:")
+    print(f"   Model: {args.model_path}")
+    print(f"   Tokenizer: {args.tokenizer_path}")
+    print("✅ Using default paths...")
+    print(f"Loading tokenizer from {args.tokenizer_path}...")
+    tokenizer = load_tokenizer(args.tokenizer_path)
+    if tokenizer is None:
+        print("Failed to load tokenizer. Exiting.")
+        return
+    config_path = os.path.join(args.model_path, "config.json")
+    model_path = os.path.join(args.model_path, "final_model.pt")
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    print(f"Loading custom model from {args.model_path}...")
+    model, config = load_model(model_path, config_path, device)
+    if model is None:
+        print("Failed to load model. Exiting.")
+        return
+    print(f"Model loaded successfully on {device}")
+    print("🤖 AI Chat Interface")
+    print("=" * 50)
+    print("Type 'quit', 'exit', or 'bye' to end the conversation")
+    print("Type 'clear' to clear the conversation history")
+    print("Type 'help' for more commands")
+    print("=" * 50)
+    conversation_history = []
+    while True:
+        user_input = input("\n🧑 You: ").strip()
+        if user_input.lower() in ['quit', 'exit', 'bye']:
+            print("👋 Goodbye!")
+            break
+        elif user_input.lower() == 'clear':
+            conversation_history = []
+            print("🗑 Conversation history cleared.")
+            continue
+        elif user_input.lower() == 'help':
+            print("Available commands:")
+            print("  quit/exit/bye: End the conversation")
+            print("  clear: Clear conversation history")
+            print("  help: Show this help message")
+            continue
+        input_tokens, error = preprocess_input(user_input, tokenizer)
+        if error:
+            print(f"🤖 AI: {error}")
+            with open("rejected_inputs.log", "a") as log_file:
+                log_file.write(f"Rejected input: {user_input}\nError: {error}\n")
+            continue
+        conversation_history.append({"role": "user", "content": user_input})
+        response, error = generate_response(model, tokenizer, input_tokens, max_new_tokens=100, device=device)
+        if error:
+            print(f"🤖 AI: {error}")
+            continue
+        conversation_history.append({"role": "assistant", "content": response})
+        print(f"\n🤖 AI: {response}")
+if __name__ == "__main__":
+    main()

config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "vocab_size": 32000,
+  "hidden_size": 512,
+  "num_layers": 8,
+  "num_attention_heads": 8,
+  "num_key_value_heads": 2,
+  "intermediate_size": 1365,
+  "max_position_embeddings": 2048,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 10000.0,
+  "learning_rate": 0.0005,
+  "weight_decay": 0.1,
+  "beta1": 0.9,
+  "beta2": 0.95,
+  "gradient_clip_val": 1.0,
+  "warmup_steps": 1000,
+  "max_steps": 50000,
+  "batch_size": 2,
+  "gradient_accumulation_steps": 16,
+  "eval_interval": 500,
+  "save_interval": 2500,
+  "max_length": 512,
+  "dataloader_workers": 0
+}

corex_tok.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c52665e3c6c707d134e023a5e250749ff1cfd4098d472c15762f856059d4d026
+size 811770

corex_tok.vocab ADDED Viewed

The diff for this file is too large to render. See raw diff

corex_tok_info.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+CoreX Tokenizer Information
+==========================
+Vocabulary Size: 32000
+Model Type: unigram
+Special Tokens:
+  PAD: 0 -> '<pad>'
+  UNK: 1 -> '<unk>'
+  BOS: 2 -> '<s>'
+  EOS: 3 -> '</s>'

final_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:894e57889be1582aa40f40e3c049829e5dcf35f9082dbba739dd0f30a21d231d
+size 219199878