File size: 4,583 Bytes
f62675d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import torch
import gguf
import numpy as np
import os
import sys
import pickle

# Character tokenizer class for loading the checkpoint
class CharacterTokenizer:
    def __init__(self):
        self.char_to_idx = {}
        self.idx_to_char = {}
        self.vocab_size = 0
        self.pad_token_id = 0
        self.unk_token_id = 1
    def fit(self, texts):
        chars = set()
        for text in texts:
            chars.update(list(str(text)))
        self.char_to_idx['<PAD>'] = 0
        self.char_to_idx['<UNK>'] = 1
        for i, char in enumerate(sorted(chars)):
            self.char_to_idx[char] = i + 2
        self.idx_to_char = {v: k for k, v in self.char_to_idx.items()}
        self.vocab_size = len(self.char_to_idx)
    def encode(self, text, max_length=None, padding=False, truncation=False, return_tensors=None):
        if isinstance(text, str):
            text = [text]
        encoded = []
        for t in text:
            tokens = [self.char_to_idx.get(c, self.unk_token_id) for c in str(t)]
            if truncation and max_length:
                tokens = tokens[:max_length]
            if padding and max_length:
                tokens = tokens + [self.pad_token_id] * (max_length - len(tokens))
            encoded.append(tokens)
        if return_tensors == 'pt':
            return torch.tensor(encoded, dtype=torch.long)
        return encoded
    def decode(self, token_ids):
        if isinstance(token_ids, torch.Tensor):
            token_ids = token_ids.tolist()
        chars = [self.idx_to_char.get(idx, '<UNK>') for idx in token_ids]
        return ''.join(chars)

def convert_sage_to_gguf(model_path, output_path):
    checkpoint = torch.load(model_path, map_location='cpu', weights_only=False)
    state_dict = checkpoint['model_state_dict']
    
    gguf_writer = gguf.GGUFWriter(output_path, "transformer_lm")
    
    # Add metadata
    gguf_writer.add_context_length(64)
    gguf_writer.add_embedding_length(256)
    gguf_writer.add_block_count(4)
    gguf_writer.add_feed_forward_length(1024)
    gguf_writer.add_head_count(8)
    gguf_writer.add_head_count_kv(8)
    gguf_writer.add_vocab_size(checkpoint['model_config']['vocab_size'])
    gguf_writer.add_layer_norm_rms_eps(1e-5)
    gguf_writer.add_name("Sage")
    gguf_writer.add_license("MIT")
    
    # Map Sage's tensor names to GGUF format
    tensor_map = {}
    
    # Embedding layers
    tensor_map['embedding.weight'] = 'token_embd.weight'
    tensor_map['pos_embedding.weight'] = 'position_embd.weight'
    tensor_map['output_layer.weight'] = 'output.weight'
    tensor_map['output_layer.bias'] = 'output.bias'
    
    # Per-layer mappings
    for i in range(4):
        p = f'transformer_encoder.layers.{i}'
        tensor_map[f'{p}.self_attn.in_proj_weight'] = f'blk.{i}.attn_q.weight'
        tensor_map[f'{p}.self_attn.in_proj_bias'] = f'blk.{i}.attn_q.bias'
        tensor_map[f'{p}.self_attn.out_proj.weight'] = f'blk.{i}.attn_output.weight'
        tensor_map[f'{p}.self_attn.out_proj.bias'] = f'blk.{i}.attn_output.bias'
        tensor_map[f'{p}.linear1.weight'] = f'blk.{i}.ffn_gate.weight'
        tensor_map[f'{p}.linear1.bias'] = f'blk.{i}.ffn_gate.bias'
        tensor_map[f'{p}.linear2.weight'] = f'blk.{i}.ffn_down.weight'
        tensor_map[f'{p}.linear2.bias'] = f'blk.{i}.ffn_down.bias'
        tensor_map[f'{p}.norm1.weight'] = f'blk.{i}.attn_norm.weight'
        tensor_map[f'{p}.norm1.bias'] = f'blk.{i}.attn_norm.bias'
        tensor_map[f'{p}.norm2.weight'] = f'blk.{i}.ffn_norm.weight'
        tensor_map[f'{p}.norm2.bias'] = f'blk.{i}.ffn_norm.bias'
    
    # Write tensors
    for orig_name in state_dict:
        tensor = state_dict[orig_name]
        mapped_name = tensor_map.get(orig_name, orig_name)
        arr = tensor.numpy().astype(np.float32)
        gguf_writer.add_tensor(mapped_name, arr)
    
    gguf_writer.write_header_to_file()
    gguf_writer.write_kv_data_to_file()
    gguf_writer.write_tensors_to_file()
    gguf_writer.close()
    
    print(f"GGUF file created: {output_path}")
    print(f"Total tensors written: {len(state_dict)}")
    print(f"NOTE: This GGUF file uses a custom architecture 'transformer_lm'")
    print(f"      and will NOT load in standard llama.cpp/llama-cpp-python")
    print(f"      without adding custom architecture support.")

script_dir = os.path.dirname(os.path.abspath(__file__))
pytorch_bin = os.path.join(script_dir, "pytorch_model.bin")
if os.path.exists(pytorch_bin):
    convert_sage_to_gguf(pytorch_bin, "sage-f16.gguf")
else:
    print(f"Model file {pytorch_bin} not found")