itriedcoding commited on
Commit
64728f0
·
verified ·
1 Parent(s): 66d4b44

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,3 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.bin filter=lfs diff=lfs merge=lfs -text
2
+ custom_llm_model.pth filter=lfs diff=lfs merge=lfs -text
3
+ hf_model/tokenizer.pkl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ # Load model and tokenizer from Hugging Face Hub
6
+ model_name = "itriedcoding/Sage"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name)
9
+
10
+ def generate_text(prompt, max_length, temperature):
11
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
12
+
13
+ with torch.no_grad():
14
+ outputs = model.generate(
15
+ inputs,
16
+ max_length=int(max_length),
17
+ temperature=temperature,
18
+ do_sample=True,
19
+ pad_token_id=tokenizer.eos_token_id
20
+ )
21
+
22
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
23
+
24
+ demo = gr.Interface(
25
+ fn=generate_text,
26
+ inputs=[
27
+ gr.Textbox(label="Prompt", value="Hello", placeholder="Enter your prompt here"),
28
+ gr.Slider(minimum=10, maximum=100, value=30, label="Max Length"),
29
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.8, label="Temperature")
30
+ ],
31
+ outputs=gr.Textbox(label="Generated Text"),
32
+ title="🤖 Sage Text Generator",
33
+ description="Generate text using the Sage custom character-level language model.",
34
+ examples=[
35
+ ["Hello", 30, 0.8],
36
+ ["The weather", 30, 0.8],
37
+ ["Deep learning", 30, 0.8]
38
+ ]
39
+ )
40
+
41
+ if __name__ == "__main__":
42
+ demo.launch()
custom_llm_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:accd9d82bd55ee686643f9e889f53e3d9938197f30fea126df1b596090c70382
3
+ size 12805265
custom_llm_project/README.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Custom LLM Model
2
+
3
+ This is a custom-built language model trained on a small dataset of example sentences about AI and machine learning.
4
+
5
+ ## Model Description
6
+
7
+ - **Model Type**: Transformer-based language model
8
+ - **Vocabulary Size**: 40 characters
9
+ - **Hidden Size**: 256
10
+ - **Number of Layers**: 4
11
+ - **Number of Attention Heads**: 8
12
+ - **Feedforward Size**: 1024
13
+ - **Max Sequence Length**: 64
14
+ - **Training Epochs**: 10
15
+ - **Parameters**: ~3.2M
16
+
17
+ ## Training Data
18
+
19
+ The model was trained on a small custom dataset containing 10 example sentences about:
20
+ - Greetings and small talk
21
+ - Weather descriptions
22
+ - Machine learning concepts
23
+ - Deep learning and transformers
24
+ - Natural language processing
25
+ - Model publishing and sharing
26
+
27
+ ## Usage
28
+
29
+ ```python
30
+ import torch
31
+ from train_model import TransformerLM, CharacterTokenizer
32
+
33
+ # Load the saved model
34
+ checkpoint = torch.load('custom_llm_model.pth', map_location='cpu')
35
+ model_config = checkpoint['model_config']
36
+ tokenizer = checkpoint['tokenizer']
37
+
38
+ # Initialize model
39
+ model = TransformerLM(**model_config)
40
+ model.load_state_dict(checkpoint['model_state_dict'])
41
+ model.eval()
42
+
43
+ # Generate text
44
+ def generate_text(prompt, max_length=50, temperature=0.8):
45
+ # Tokenize prompt
46
+ input_ids = tokenizer.encode(prompt, max_length=32, padding=False, return_tensors='pt')
47
+ generated = input_ids.clone()
48
+
49
+ with torch.no_grad():
50
+ for _ in range(max_length):
51
+ logits = model(generated)
52
+ next_token_logits = logits[0, -1, :] / temperature
53
+ probs = torch.softmax(next_token_logits, dim=-1)
54
+ next_token = torch.multinomial(probs, num_samples=1)
55
+ generated = torch.cat([generated, next_token.unsqueeze(0)], dim=1)
56
+
57
+ # Stop on period or max length
58
+ if next_token.item() == tokenizer.char_to_idx.get('.', tokenizer.unk_token_id):
59
+ break
60
+
61
+ return tokenizer.decode(generated[0])
62
+
63
+ # Example usage
64
+ print(generate_text("Hello"))
65
+ print(generate_text("The weather"))
66
+ print(generate_text("Deep learning"))
67
+ ```
68
+
69
+ ## Limitations
70
+
71
+ This is a small demonstration model trained on very limited data. For serious applications, consider:
72
+ - Using larger datasets
73
+ - Training for more epochs
74
+ - Using larger model architectures
75
+ - Implementing proper tokenization (BPE, WordPiece, etc.)
76
+
77
+ ## License
78
+
79
+ This model is released under the MIT License.
custom_llm_project/hf_model/README.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Custom LLM Model
2
+
3
+ A small custom-built transformer language model trained on example sentences about AI and machine learning.
4
+
5
+ ## Model Description
6
+
7
+ This is a demonstration model built to showcase how to create and publish a custom AI model to Hugging Face. The model is a transformer-based language model with:
8
+
9
+ - **Architecture**: Transformer decoder
10
+ - **Vocabulary Size**: 40 characters
11
+ - **Hidden Size**: 256
12
+ - **Number of Layers**: 4
13
+ - **Number of Attention Heads**: 8
14
+ - **Feedforward Size**: 1024
15
+ - **Max Sequence Length**: 64
16
+ - **Parameters**: ~3.2M
17
+
18
+ ## Training Data
19
+
20
+ The model was trained on a small custom dataset containing 10 example sentences about:
21
+ - Greetings and small talk
22
+ - Weather descriptions
23
+ - Machine learning concepts
24
+ - Deep learning and transformers
25
+ - Natural language processing
26
+ - Model publishing and sharing
27
+
28
+ ## Usage
29
+
30
+ ```python
31
+ from transformers import AutoTokenizer, AutoModelForCausalLM
32
+ import torch
33
+
34
+ # Load model and tokenizer
35
+ model_name = "your-username/custom-llm-model" # Replace with your HF username
36
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
37
+ model = AutoModelForCausalLM.from_pretrained(model_name)
38
+
39
+ # Generate text
40
+ def generate_text(prompt, max_length=50, temperature=0.8):
41
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
42
+
43
+ with torch.no_grad():
44
+ outputs = model.generate(
45
+ inputs,
46
+ max_length=max_length,
47
+ temperature=temperature,
48
+ do_sample=True,
49
+ pad_token_id=tokenizer.eos_token_id
50
+ )
51
+
52
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
53
+
54
+ # Example usage
55
+ print(generate_text("Hello"))
56
+ print(generate_text("The weather"))
57
+ print(generate_text("Deep learning"))
58
+ ```
59
+
60
+ ## Limitations
61
+
62
+ This is a small demonstration model trained on very limited data. For serious applications, consider:
63
+ - Using larger datasets
64
+ - Training for more epochs
65
+ - Using larger model architectures
66
+ - Implementing proper tokenization (BPE, WordPiece, etc.)
67
+
68
+ ## License
69
+
70
+ This model is released under the MIT License.
71
+
72
+ ## Citation
73
+
74
+ ```
75
+ @misc{custom_llm_model,
76
+ author = {Your Name},
77
+ title = {Custom LLM Model},
78
+ year = {2026},
79
+ publisher = {Hugging Face},
80
+ journal = {Hugging Face Model Hub},
81
+ doi = {10.57967/hf/0000}
82
+ }
83
+ ```
custom_llm_project/hf_model/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .modeling_transformer_lm import TransformerLM, TransformerLMConfig
2
+
3
+ __all__ = ["TransformerLM", "TransformerLMConfig"]
custom_llm_project/hf_model/config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": ["TransformerLM"],
3
+ "model_type": "transformer_lm",
4
+ "vocab_size": 40,
5
+ "hidden_size": 256,
6
+ "num_hidden_layers": 4,
7
+ "num_attention_heads": 8,
8
+ "intermediate_size": 1024,
9
+ "max_position_embeddings": 64,
10
+ "pad_token_id": 0,
11
+ "bos_token_id": 1,
12
+ "eos_token_id": 2,
13
+ "torch_dtype": "float32"
14
+ }
custom_llm_project/hf_model/modeling_transformer_lm.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import math
4
+ from transformers import PreTrainedModel
5
+ from transformers.modeling_utils import PretrainedConfig
6
+
7
+ class TransformerLMConfig(PretrainedConfig):
8
+ model_type = "transformer_lm"
9
+
10
+ def __init__(
11
+ self,
12
+ vocab_size=40,
13
+ hidden_size=256,
14
+ num_hidden_layers=4,
15
+ num_attention_heads=8,
16
+ intermediate_size=1024,
17
+ max_position_embeddings=64,
18
+ pad_token_id=0,
19
+ bos_token_id=1,
20
+ eos_token_id=2,
21
+ **kwargs
22
+ ):
23
+ super().__init__(
24
+ pad_token_id=pad_token_id,
25
+ bos_token_id=bos_token_id,
26
+ eos_token_id=eos_token_id,
27
+ **kwargs
28
+ )
29
+
30
+ self.vocab_size = vocab_size
31
+ self.hidden_size = hidden_size
32
+ self.num_hidden_layers = num_hidden_layers
33
+ self.num_attention_heads = num_attention_heads
34
+ self.intermediate_size = intermediate_size
35
+ self.max_position_embeddings = max_position_embeddings
36
+
37
+ class TransformerLM(PreTrainedModel):
38
+ config_class = TransformerLMConfig
39
+
40
+ def __init__(self, config):
41
+ super().__init__(config)
42
+ self.config = config
43
+
44
+ self.embedding = nn.Embedding(config.vocab_size, config.hidden_size)
45
+ self.pos_embedding = nn.Embedding(config.max_position_embeddings, config.hidden_size)
46
+
47
+ encoder_layer = nn.TransformerEncoderLayer(
48
+ d_model=config.hidden_size,
49
+ nhead=config.num_attention_heads,
50
+ dim_feedforward=config.intermediate_size,
51
+ batch_first=True
52
+ )
53
+ self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=config.num_hidden_layers)
54
+ self.output_layer = nn.Linear(config.hidden_size, config.vocab_size)
55
+
56
+ self.max_position_embeddings = config.max_position_embeddings
57
+
58
+ def forward(self, input_ids, attention_mask=None, labels=None):
59
+ seq_len = input_ids.size(1)
60
+ pos = torch.arange(0, seq_len, device=input_ids.device).unsqueeze(0)
61
+
62
+ # Embedding + positional encoding
63
+ src_emb = self.embedding(input_ids) * math.sqrt(self.config.hidden_size)
64
+ pos_emb = self.pos_embedding(pos)
65
+ src_emb = src_emb + pos_emb
66
+
67
+ # Create key padding mask for transformer (True where we should mask)
68
+ if attention_mask is not None:
69
+ # Transformer expects True for positions to mask
70
+ src_key_padding_mask = ~attention_mask.bool()
71
+ else:
72
+ src_key_padding_mask = None
73
+
74
+ # Transformer encoder
75
+ output = self.transformer_encoder(src_emb, src_key_padding_mask=src_key_padding_mask)
76
+
77
+ # Output projection
78
+ logits = self.output_layer(output)
79
+
80
+ loss = None
81
+ if labels is not None:
82
+ # Shift so that tokens < n predict n
83
+ shift_logits = logits[..., :-1, :].contiguous()
84
+ shift_labels = labels[..., 1:].contiguous()
85
+ loss_fct = nn.CrossEntropyLoss()
86
+ loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
87
+
88
+ return {
89
+ "loss": loss,
90
+ "logits": logits
91
+ }
92
+
93
+ def prepare_inputs_for_generation(self, input_ids, **kwargs):
94
+ # Only last token for inputs_ids if past is defined in kwargs
95
+ if "past_key_values" in kwargs:
96
+ input_ids = input_ids[:, -1].unsqueeze(-1)
97
+
98
+ attention_mask = kwargs.get("attention_mask", None)
99
+ position_ids = kwargs.get("position_ids", None)
100
+
101
+ # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
102
+ if attention_mask is not None:
103
+ attention_mask = attention_mask
104
+
105
+ return {
106
+ "input_ids": input_ids,
107
+ "attention_mask": attention_mask,
108
+ "position_ids": position_ids,
109
+ }
custom_llm_project/sage_model/README.md ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sage - Custom LLM Model
2
+
3
+ Sage is a custom-built transformer language model designed for text generation tasks. This model demonstrates the full lifecycle of building and publishing a custom AI model to Hugging Face.
4
+
5
+ ## Model Overview
6
+
7
+ - **Model Type**: Transformer-based language model
8
+ - **Architecture**: Decoder-only transformer
9
+ - **Vocabulary Size**: 40 characters
10
+ - **Hidden Size**: 256
11
+ - **Number of Layers**: 4
12
+ - **Number of Attention Heads**: 8
13
+ - **Feedforward Size**: 1024
14
+ - **Max Sequence Length**: 64
15
+ - **Parameters**: ~3,195,944
16
+ - **Training Framework**: PyTorch
17
+ - **License**: MIT
18
+
19
+ ## Training Data
20
+
21
+ Sage was trained on a curated dataset of example sentences covering:
22
+ - Conversational phrases and greetings
23
+ - Weather and environmental descriptions
24
+ - Machine learning and AI concepts
25
+ - Deep learning architectures (transformers, neural networks)
26
+ - Natural language processing applications
27
+ - Model development and deployment practices
28
+
29
+ The dataset consists of 10 examples designed to teach the model patterns in technical and conversational English.
30
+
31
+ ## Technical Specifications
32
+
33
+ ### Model Architecture
34
+ ```
35
+ TransformerLM(
36
+ (embedding): Embedding(40, 256)
37
+ (pos_embedding): Embedding(64, 256)
38
+ (transformer_encoder): TransformerEncoder(
39
+ (layers): ModuleList(
40
+ (0-3): TransformerEncoderLayer(
41
+ (self_attn): MultiheadAttention(embed_dim=256, num_heads=8)
42
+ (linear1): Linear(256, 1024)
43
+ (linear2): Linear(1024, 256)
44
+ (norm1): LayerNorm(256)
45
+ (norm2): LayerNorm(256)
46
+ (dropout): Dropout(p=0.1)
47
+ )
48
+ )
49
+ )
50
+ (output_layer): Linear(256, 40)
51
+ )
52
+ ```
53
+
54
+ ### Tokenization
55
+ Sage uses a character-level tokenizer with:
56
+ - Vocabulary: 40 unique characters including special tokens
57
+ - Special tokens: `<PAD>` (0), `<UNK>` (1)
58
+ - Encoding: UTF-8 character mapping
59
+ - Maximum sequence length: 64 tokens
60
+
61
+ ## Usage
62
+
63
+ ### With Transformers Library
64
+ ```python
65
+ from transformers import AutoTokenizer, AutoModelForCausalLM
66
+ import torch
67
+
68
+ model_name = "itriedcoding/Sage"
69
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
70
+ model = AutoModelForCausalLM.from_pretrained(model_name)
71
+
72
+ def generate_text(prompt, max_length=50, temperature=0.8):
73
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
74
+ with torch.no_grad():
75
+ outputs = model.generate(
76
+ inputs,
77
+ max_length=max_length,
78
+ temperature=temperature,
79
+ do_sample=True,
80
+ pad_token_id=tokenizer.eos_token_id
81
+ )
82
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
83
+
84
+ print(generate_text("Hello"))
85
+ print(generate_text("Deep learning"))
86
+ ```
87
+
88
+ ### Direct PyTorch Usage
89
+ ```python
90
+ import torch
91
+ from modeling_transformer_lm import TransformerLM
92
+
93
+ model = TransformerLM.from_pretrained("itriedcoding/Sage")
94
+ ```
95
+
96
+ ## Model Card Metadata
97
+
98
+ ```
99
+ library_name: transformers
100
+ license: MIT
101
+ base_model: custom-built
102
+ tags:
103
+ - text-generation
104
+ - transformer
105
+ - character-level
106
+ - custom-model
107
+ - educational
108
+ pipeline_tag: text-generation
109
+ ```
110
+
111
+ ## Hugging Face Spaces Deployment
112
+
113
+ You can run Sage in the dedicated Hugging Face Space:
114
+ https://huggingface.co/spaces/itriedcoding/sage-space
115
+
116
+ ### Gradio Space
117
+ The Space at `itriedcoding/sage-space` provides a Gradio interface for text generation.
118
+ Create a new Space with `app.py`:
119
+ ```python
120
+ import gradio as gr
121
+ from transformers import AutoTokenizer, AutoModelForCausalLM
122
+ import torch
123
+
124
+ model_name = "itriedcoding/Sage"
125
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
126
+ model = AutoModelForCausalLM.from_pretrained(model_name)
127
+
128
+ def generate_text(prompt, max_length, temperature):
129
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
130
+ with torch.no_grad():
131
+ outputs = model.generate(
132
+ inputs,
133
+ max_length=int(max_length),
134
+ temperature=temperature,
135
+ do_sample=True,
136
+ pad_token_id=tokenizer.eos_token_id
137
+ )
138
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
139
+
140
+ demo = gr.Interface(
141
+ fn=generate_text,
142
+ inputs=[
143
+ gr.Textbox(label="Prompt", value="Hello"),
144
+ gr.Slider(minimum=10, maximum=100, value=30, label="Max Length"),
145
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.8, label="Temperature")
146
+ ],
147
+ outputs=gr.Textbox(label="Generated Text"),
148
+ title="Sage Text Generator",
149
+ description="Custom character-level language model"
150
+ )
151
+
152
+ if __name__ == "__main__":
153
+ demo.launch()
154
+ ```
155
+
156
+ ## GGUF Quantization
157
+
158
+ WARNING: Sage uses a custom transformer architecture (not Llama-family). Standard GGUF conversion tools (llama.cpp) do not support this architecture. To generate GGUF files, you must write a custom conversion script that maps Sage's layers to GGUF format. A conversion script template is provided below.
159
+
160
+ ### GGUF Conversion Script Template
161
+ ```python
162
+ import torch
163
+ import struct
164
+
165
+ def convert_to_gguf(model_path, output_path):
166
+ checkpoint = torch.load(model_path, map_location='cpu', weights_only=False)
167
+ state_dict = checkpoint['model_state_dict']
168
+ # Custom conversion logic to write GGUF format
169
+ # Standard GGUF tools like llama.cpp's convert.py will NOT work
170
+ # You need to implement the GGUF tensor serialization manually
171
+ print(f"Converting {len(state_dict)} tensors to GGUF format...")
172
+ # Implementation depends on the target runtime
173
+
174
+ convert_to_gguf("pytorch_model.bin", "sage.gguf")
175
+ ```
176
+
177
+ ### Recommended Alternative Formats
178
+ - **PyTorch**: Full precision (pytorch_model.bin) - already provided
179
+ - **TorchScript**: torch.jit.trace for optimized CPU/GPU inference
180
+ - **ONNX**: Use torch.onnx.export for cross-platform deployment
181
+
182
+ ## Performance & Limitations
183
+
184
+ ### Intended Use
185
+ - Educational demonstrations of transformer architectures
186
+ - Character-level language modeling experiments
187
+ - Prototyping and testing custom model pipelines
188
+ - Learning about model deployment on Hugging Face
189
+
190
+ ### Limitations
191
+ - Character-level tokenization limits coherence
192
+ - Small training dataset (10 examples)
193
+ - Small model size (3.2M parameters)
194
+ - Not suitable for production NLP applications
195
+ - Best for short text generation (<50 tokens)
196
+
197
+ ## Citation
198
+
199
+ ```bibtex
200
+ @misc{sage_model_2026,
201
+ author = {itriedcoding},
202
+ title = {Sage: Custom Character-Level Language Model},
203
+ year = {2026},
204
+ publisher = {Hugging Face},
205
+ journal = {Hugging Face Model Hub},
206
+ url = {https://huggingface.co/itriedcoding/Sage}
207
+ }
208
+ ```
209
+
210
+ ## Training Reproducibility
211
+
212
+ To reproduce this model:
213
+ 1. Clone the repository
214
+ 2. Install requirements: `pip install torch pandas`
215
+ 3. Run training: The model was trained using the script in `train_model.py`
216
+ 4. The trained checkpoint is saved as a PyTorch .pth file
217
+
218
+ ## Contact
219
+
220
+ - Hugging Face: https://huggingface.co/itriedcoding
221
+ - Model Space: https://huggingface.co/spaces/itriedcoding/sage-space
222
+ - Issues: Use the "Issues" tab on this model page
custom_llm_project/sage_model/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .modeling_transformer_lm import TransformerLM, TransformerLMConfig
2
+
3
+ __all__ = ["TransformerLM", "TransformerLMConfig"]
custom_llm_project/sage_model/config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": ["TransformerLM"],
3
+ "model_type": "transformer_lm",
4
+ "vocab_size": 40,
5
+ "hidden_size": 256,
6
+ "num_hidden_layers": 4,
7
+ "num_attention_heads": 8,
8
+ "intermediate_size": 1024,
9
+ "max_position_embeddings": 64,
10
+ "pad_token_id": 0,
11
+ "bos_token_id": 1,
12
+ "eos_token_id": 2,
13
+ "torch_dtype": "float32"
14
+ }
custom_llm_project/sage_model/modeling_transformer_lm.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import math
4
+ from transformers import PreTrainedModel
5
+ from transformers.modeling_utils import PretrainedConfig
6
+
7
+ class TransformerLMConfig(PretrainedConfig):
8
+ model_type = "transformer_lm"
9
+
10
+ def __init__(
11
+ self,
12
+ vocab_size=40,
13
+ hidden_size=256,
14
+ num_hidden_layers=4,
15
+ num_attention_heads=8,
16
+ intermediate_size=1024,
17
+ max_position_embeddings=64,
18
+ pad_token_id=0,
19
+ bos_token_id=1,
20
+ eos_token_id=2,
21
+ **kwargs
22
+ ):
23
+ super().__init__(
24
+ pad_token_id=pad_token_id,
25
+ bos_token_id=bos_token_id,
26
+ eos_token_id=eos_token_id,
27
+ **kwargs
28
+ )
29
+
30
+ self.vocab_size = vocab_size
31
+ self.hidden_size = hidden_size
32
+ self.num_hidden_layers = num_hidden_layers
33
+ self.num_attention_heads = num_attention_heads
34
+ self.intermediate_size = intermediate_size
35
+ self.max_position_embeddings = max_position_embeddings
36
+
37
+ class TransformerLM(PreTrainedModel):
38
+ config_class = TransformerLMConfig
39
+
40
+ def __init__(self, config):
41
+ super().__init__(config)
42
+ self.config = config
43
+
44
+ self.embedding = nn.Embedding(config.vocab_size, config.hidden_size)
45
+ self.pos_embedding = nn.Embedding(config.max_position_embeddings, config.hidden_size)
46
+
47
+ encoder_layer = nn.TransformerEncoderLayer(
48
+ d_model=config.hidden_size,
49
+ nhead=config.num_attention_heads,
50
+ dim_feedforward=config.intermediate_size,
51
+ batch_first=True
52
+ )
53
+ self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=config.num_hidden_layers)
54
+ self.output_layer = nn.Linear(config.hidden_size, config.vocab_size)
55
+
56
+ self.max_position_embeddings = config.max_position_embeddings
57
+
58
+ def forward(self, input_ids, attention_mask=None, labels=None):
59
+ seq_len = input_ids.size(1)
60
+ pos = torch.arange(0, seq_len, device=input_ids.device).unsqueeze(0)
61
+
62
+ # Embedding + positional encoding
63
+ src_emb = self.embedding(input_ids) * math.sqrt(self.config.hidden_size)
64
+ pos_emb = self.pos_embedding(pos)
65
+ src_emb = src_emb + pos_emb
66
+
67
+ # Create key padding mask for transformer (True where we should mask)
68
+ if attention_mask is not None:
69
+ # Transformer expects True for positions to mask
70
+ src_key_padding_mask = ~attention_mask.bool()
71
+ else:
72
+ src_key_padding_mask = None
73
+
74
+ # Transformer encoder
75
+ output = self.transformer_encoder(src_emb, src_key_padding_mask=src_key_padding_mask)
76
+
77
+ # Output projection
78
+ logits = self.output_layer(output)
79
+
80
+ loss = None
81
+ if labels is not None:
82
+ # Shift so that tokens < n predict n
83
+ shift_logits = logits[..., :-1, :].contiguous()
84
+ shift_labels = labels[..., 1:].contiguous()
85
+ loss_fct = nn.CrossEntropyLoss()
86
+ loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
87
+
88
+ return {
89
+ "loss": loss,
90
+ "logits": logits
91
+ }
92
+
93
+ def prepare_inputs_for_generation(self, input_ids, **kwargs):
94
+ # Only last token for inputs_ids if past is defined in kwargs
95
+ if "past_key_values" in kwargs:
96
+ input_ids = input_ids[:, -1].unsqueeze(-1)
97
+
98
+ attention_mask = kwargs.get("attention_mask", None)
99
+ position_ids = kwargs.get("position_ids", None)
100
+
101
+ # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
102
+ if attention_mask is not None:
103
+ attention_mask = attention_mask
104
+
105
+ return {
106
+ "input_ids": input_ids,
107
+ "attention_mask": attention_mask,
108
+ "position_ids": position_ids,
109
+ }
custom_llm_project/sage_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:accd9d82bd55ee686643f9e889f53e3d9938197f30fea126df1b596090c70382
3
+ size 12805265
custom_llm_project/train_model.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import numpy as np
5
+ import json
6
+ import pandas as pd
7
+ from torch.utils.data import Dataset, DataLoader
8
+ import math
9
+
10
+ # Set device
11
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
+ print(f"Using device: {device}")
13
+
14
+ # Custom Dataset Class
15
+ class TextDataset(Dataset):
16
+ def __init__(self, texts, tokenizer, max_length=128):
17
+ self.texts = texts
18
+ self.tokenizer = tokenizer
19
+ self.max_length = max_length
20
+
21
+ def __len__(self):
22
+ return len(self.texts)
23
+
24
+ def __getitem__(self, idx):
25
+ text = str(self.texts[idx])
26
+ tokens = self.tokenizer.encode(text, max_length=self.max_length,
27
+ padding='max_length', truncation=True, return_tensors='pt')
28
+ input_ids = tokens.squeeze(0)
29
+ # For language modeling, target is input shifted by 1
30
+ target_ids = torch.cat([input_ids[1:], torch.tensor([self.tokenizer.pad_token_id])], dim=0)
31
+ return input_ids, target_ids
32
+
33
+ # Simple Character-level Tokenizer
34
+ class CharacterTokenizer:
35
+ def __init__(self):
36
+ self.char_to_idx = {}
37
+ self.idx_to_char = {}
38
+ self.vocab_size = 0
39
+ self.pad_token_id = 0
40
+ self.unk_token_id = 1
41
+
42
+ def fit(self, texts):
43
+ # Build vocabulary from characters
44
+ chars = set()
45
+ for text in texts:
46
+ chars.update(list(str(text)))
47
+
48
+ # Add special tokens
49
+ self.char_to_idx['<PAD>'] = 0
50
+ self.char_to_idx['<UNK>'] = 1
51
+
52
+ # Add regular characters
53
+ for i, char in enumerate(sorted(chars)):
54
+ self.char_to_idx[char] = i + 2
55
+
56
+ # Create reverse mapping
57
+ self.idx_to_char = {v: k for k, v in self.char_to_idx.items()}
58
+ self.vocab_size = len(self.char_to_idx)
59
+
60
+ def encode(self, text, max_length=None, padding=False, truncation=False, return_tensors=None):
61
+ if isinstance(text, str):
62
+ text = [text]
63
+
64
+ encoded = []
65
+ for t in text:
66
+ tokens = [self.char_to_idx.get(c, self.unk_token_id) for c in str(t)]
67
+ if truncation and max_length:
68
+ tokens = tokens[:max_length]
69
+ if padding and max_length:
70
+ tokens = tokens + [self.pad_token_id] * (max_length - len(tokens))
71
+ encoded.append(tokens)
72
+
73
+ if return_tensors == 'pt':
74
+ return torch.tensor(encoded, dtype=torch.long)
75
+ return encoded
76
+
77
+ def decode(self, token_ids):
78
+ if isinstance(token_ids, torch.Tensor):
79
+ token_ids = token_ids.tolist()
80
+ chars = [self.idx_to_char.get(idx, '<UNK>') for idx in token_ids]
81
+ return ''.join(chars)
82
+
83
+ # Transformer Language Model
84
+ class TransformerLM(nn.Module):
85
+ def __init__(self, vocab_size, d_model=256, nhead=8, num_layers=4, dim_feedforward=1024, max_seq_length=128):
86
+ super(TransformerLM, self).__init__()
87
+ self.d_model = d_model
88
+ self.embedding = nn.Embedding(vocab_size, d_model)
89
+ self.pos_embedding = nn.Embedding(max_seq_length, d_model)
90
+
91
+ encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead,
92
+ dim_feedforward=dim_feedforward, batch_first=True)
93
+ self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
94
+ self.output_layer = nn.Linear(d_model, vocab_size)
95
+
96
+ self.max_seq_length = max_seq_length
97
+
98
+ def forward(self, src):
99
+ seq_len = src.size(1)
100
+ pos = torch.arange(0, seq_len, device=src.device).unsqueeze(0)
101
+
102
+ # Embedding + positional encoding
103
+ src_emb = self.embedding(src) * math.sqrt(self.d_model)
104
+ pos_emb = self.pos_embedding(pos)
105
+ src_emb = src_emb + pos_emb
106
+
107
+ # Create mask for padding (optional)
108
+ # src_key_padding_mask = (src == 0) # Assuming 0 is pad token
109
+
110
+ # Transformer encoder
111
+ output = self.transformer_encoder(src_emb) # , src_key_padding_mask=src_key_padding_mask)
112
+
113
+ # Output projection
114
+ logits = self.output_layer(output)
115
+ return logits
116
+
117
+ # Load dataset
118
+ print("Loading dataset...")
119
+ df = pd.read_csv('data/dataset.csv')
120
+ texts = df['text'].tolist()
121
+ print(f"Loaded {len(texts)} text samples")
122
+
123
+ # Initialize tokenizer
124
+ tokenizer = CharacterTokenizer()
125
+ tokenizer.fit(texts)
126
+ print(f"Vocabulary size: {tokenizer.vocab_size}")
127
+
128
+ # Create dataset and dataloader
129
+ dataset = TextDataset(texts, tokenizer, max_length=64)
130
+ dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
131
+
132
+ # Initialize model
133
+ model = TransformerLM(
134
+ vocab_size=tokenizer.vocab_size,
135
+ d_model=256,
136
+ nhead=8,
137
+ num_layers=4,
138
+ dim_feedforward=1024,
139
+ max_seq_length=64
140
+ ).to(device)
141
+
142
+ print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
143
+
144
+ # Loss and optimizer
145
+ criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)
146
+ optimizer = optim.AdamW(model.parameters(), lr=0.001)
147
+
148
+ # Training loop
149
+ num_epochs = 10
150
+ model.train()
151
+
152
+ print("Starting training...")
153
+ for epoch in range(num_epochs):
154
+ total_loss = 0
155
+ num_batches = 0
156
+
157
+ for batch_idx, (input_ids, target_ids) in enumerate(dataloader):
158
+ input_ids = input_ids.to(device)
159
+ target_ids = target_ids.to(device)
160
+
161
+ # Forward pass
162
+ optimizer.zero_grad()
163
+ logits = model(input_ids)
164
+
165
+ # Reshape for loss calculation: (batch_size * seq_len, vocab_size)
166
+ loss = criterion(logits.view(-1, logits.size(-1)), target_ids.view(-1))
167
+
168
+ # Backward pass
169
+ loss.backward()
170
+ optimizer.step()
171
+
172
+ total_loss += loss.item()
173
+ num_batches += 1
174
+
175
+ if batch_idx % 10 == 0:
176
+ print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx}/{len(dataloader)}], Loss: {loss.item():.4f}')
177
+
178
+ avg_loss = total_loss / num_batches
179
+ print(f'Epoch [{epoch+1}/{num_epochs}] Completed - Average Loss: {avg_loss:.4f}')
180
+
181
+ # Save model and tokenizer
182
+ print("Saving model and tokenizer...")
183
+ torch.save({
184
+ 'model_state_dict': model.state_dict(),
185
+ 'tokenizer': tokenizer,
186
+ 'model_config': {
187
+ 'vocab_size': tokenizer.vocab_size,
188
+ 'd_model': 256,
189
+ 'nhead': 8,
190
+ 'num_layers': 4,
191
+ 'dim_feedforward': 1024,
192
+ 'max_seq_length': 64
193
+ }
194
+ }, 'custom_llm_model.pth')
195
+
196
+ print("Training completed! Model saved as 'custom_llm_model.pth'")
197
+
198
+ # Test generation
199
+ def generate_text(model, tokenizer, prompt, max_length=50, temperature=0.8):
200
+ model.eval()
201
+ with torch.no_grad():
202
+ # Tokenize prompt
203
+ input_ids = tokenizer.encode(prompt, max_length=32, padding=False, return_tensors='pt')
204
+ input_ids = input_ids.to(device)
205
+
206
+ generated = input_ids.clone()
207
+
208
+ for _ in range(max_length):
209
+ # Get model predictions
210
+ logits = model(generated)
211
+ next_token_logits = logits[0, -1, :] / temperature
212
+
213
+ # Apply softmax to get probabilities
214
+ probs = torch.softmax(next_token_logits, dim=-1)
215
+
216
+ # Sample next token
217
+ next_token = torch.multinomial(probs, num_samples=1)
218
+
219
+ # Append to generated sequence
220
+ generated = torch.cat([generated, next_token.unsqueeze(0)], dim=1)
221
+
222
+ # Stop if we generate a period or reach reasonable length
223
+ if next_token.item() == tokenizer.char_to_idx.get('.', tokenizer.unk_token_id):
224
+ break
225
+
226
+ # Decode generated text
227
+ generated_text = tokenizer.decode(generated[0])
228
+ return generated_text
229
+
230
+ # Test the model
231
+ print("\nTesting generation:")
232
+ test_prompts = ["Hello", "The weather", "Deep learning"]
233
+ for prompt in test_prompts:
234
+ generated = generate_text(model, tokenizer, prompt, max_length=30)
235
+ print(f"Prompt: '{prompt}' -> Generated: '{generated}'")
data/dataset.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text
2
+ "Hello, how are you today?"
3
+ "The weather is nice and sunny."
4
+ "I enjoy reading books about machine learning."
5
+ "Deep learning has revolutionized artificial intelligence."
6
+ "Natural language processing enables computers to understand text."
7
+ "Transformers are a type of neural network architecture."
8
+ "Training large language models requires significant computational resources."
9
+ "Hugging Face provides a platform for sharing machine learning models."
10
+ "I am excited to build my own custom AI model."
11
+ "Publishing models to the hub allows others to use them."
hf_model/config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "TransformerLM"
4
+ ],
5
+ "model_type": "transformer_lm",
6
+ "vocab_size": 40,
7
+ "hidden_size": 256,
8
+ "num_hidden_layers": 4,
9
+ "num_attention_heads": 8,
10
+ "intermediate_size": 1024,
11
+ "max_position_embeddings": 64,
12
+ "pad_token_id": 0,
13
+ "bos_token_id": 1,
14
+ "eos_token_id": 2,
15
+ "torch_dtype": "float32"
16
+ }
hf_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972f246378970d2307ada18d34ee1834890e8241ffca6aec7c899eb4bc658426
3
+ size 12803875
hf_model/tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23e5006ee164f745f1ed22e8fe7cff1c0139ed66a02f05b52037e906eab257fd
3
+ size 509