USER commited on
Commit
1ebe45d
·
1 Parent(s): 5590223

app is complete

Browse files
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
__pycache__/plot_tokens.cpython-313.pyc ADDED
Binary file (1.33 kB). View file
 
__pycache__/text_dataset.cpython-313.pyc ADDED
Binary file (2.6 kB). View file
 
__pycache__/tokenizer.cpython-313.pyc ADDED
Binary file (2.38 kB). View file
 
__pycache__/usta_causal_attention.cpython-313.pyc ADDED
Binary file (2.33 kB). View file
 
__pycache__/usta_decoder_block.cpython-313.pyc ADDED
Binary file (1.59 kB). View file
 
__pycache__/usta_embedding.cpython-313.pyc ADDED
Binary file (2.47 kB). View file
 
__pycache__/usta_layer_norm.cpython-313.pyc ADDED
Binary file (1.38 kB). View file
 
__pycache__/usta_mlp.cpython-313.pyc ADDED
Binary file (2.29 kB). View file
 
__pycache__/usta_model.cpython-313.pyc ADDED
Binary file (2.57 kB). View file
 
__pycache__/usta_multi_head_attention.cpython-313.pyc ADDED
Binary file (1.8 kB). View file
 
__pycache__/usta_norm.cpython-313.pyc ADDED
Binary file (1.37 kB). View file
 
__pycache__/usta_self_attention.cpython-313.pyc ADDED
Binary file (1.58 kB). View file
 
__pycache__/usta_tokenizer.cpython-313.pyc ADDED
Binary file (2.63 kB). View file
 
app.py CHANGED
@@ -1,70 +1,214 @@
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- messages.extend(history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- messages.append({"role": "user", "content": message})
24
 
25
- response = ""
 
 
 
 
 
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
 
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
-
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
67
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
+ import os
2
+ from sys import exception
3
+ from gradio.components import clear_button
4
+ from httpx import stream
5
+ import torch
6
  import gradio as gr
7
+ import requests
8
 
9
 
10
+ from v1.usta_model import UstaModel
11
+ from v1.usta_tokenizer import UstaTokenizer
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ model, tokenizer, model_status = None, None, "Not Loaded"
14
+
15
+
16
+ def load_model(custom_model_path=None):
17
+
18
+ try:
19
+ u_tokenizer = UstaTokenizer("v1/tokenize.json")
20
+ print(f"Tokenizer loaded successfully, vocab size: {len(u_tokenizer.vocab)}")
21
+
22
+
23
+
24
+ context_length = 32
25
+ vocab_size = len(u_tokenizer.vocab)
26
+ embedding_dim = 12
27
+ num_heads = 4
28
+ num_layers = 8
29
+
30
 
31
+
32
+ model = UstaModel(
33
+ vocab_size=vocab_size,
34
+ embedding_dim=embedding_dim,
35
+ num_heads=num_heads,
36
+ context_length=context_length,
37
+ num_layers=num_layers)
38
+
39
+ if custom_model_path and os.path.exists(custom_model_path):
40
+ model.load_state_dict(torch.load(custom_model_path))
41
+ else:
42
+ model.load_state_dict(torch.load("v1/u1_model.pth"))
43
+
44
+ model.eval()
45
+ print(f"Model loaded successfully, model parameters: {len(u_tokenizer.vocab)}")
46
+
47
+ return model, u_tokenizer, "Model Loaded Successfully"
48
+ except Exception as e:
49
+ return None, None, f"Error Loading Model: {e}"
50
 
 
51
 
52
+ try:
53
+ model, tokenizer, model_status = load_model()
54
+
55
+ except Exception as e:
56
+ print(f"Error loading model: {e}")
57
+ model, tokenizer, model_status = None, None, "Error Loading Model"
58
+
59
+ print(f"Model status: {model_status}")
60
 
61
+ if model is not None:
62
+ print("Model loaded successfully")
63
+
64
+ def chat_with_model(message, chat_history, max_new_tokens = 20):
65
+ try:
66
+ tokens = tokenizer.encode(message)
67
+ if len(tokens) > 25:
68
+ tokens = tokens[-25:]
69
+
70
+ with torch.no_grad():
71
+ actual_max_tokens = min(max_new_tokens,32 - len(tokens))
72
+ generated_tokens = model.generate(tokens, max_new_tokens=actual_max_tokens)
73
+
74
+ response = tokenizer.decode(generated_tokens)
75
+
76
+ original_message = tokenizer.decode(tokens.tolist())
77
+ if response.startswith(original_message):
78
+ response = response[len(original_message):]
79
 
80
+
81
+
82
+ response = response.replace("<pad>","").replace("<unk>","").strip()
83
+
84
+ print(f"uzunluk {len(response)}")
85
+ if len(response) <= 0:
86
+ response = "I am sorry i dont know the answer to that question"
87
+
88
+
89
+ chat_history.append((message, response))
90
+ return chat_history,""
91
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
 
94
+
95
+ except Exception as e:
96
+ print(f"Error generating response {e}")
97
+ return chat_history, "Error generating response"
98
+
99
+ def load_model_from_url(custom_model_url):
100
+ global model, tokenizer, model_status
101
+
102
+ try:
103
+ headers = {
104
+ "Accept":"application/octet-stream",
105
+ "User-Agent": "Mozilla5.0 (Windows NT 10.0; Win64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
106
+ }
107
+ response = requests.get(custom_model_url, headers=headers)
108
+ response.raise_for_status()
109
+
110
+ temp_file = "temp_model_pth"
111
+ with open(temp_file,"wb") as f:
112
+ f.write(response.content)
113
+
114
+ model, tokenizer, model_status = load_model(temp_file)
115
+ os.remove(temp_file)
116
+ return "Model loaded successfully on url"
117
+
118
+ except Exception as e:
119
+ print(f"Error loading model from url {e}")
120
+ return "Error loading model from url"
121
+
122
+ def load_model_from_file(model_file):
123
+ global model, tokenizer, model_status
124
+
125
+ try:
126
+ model, tokenizer, model_status=load_model(model_file.name)
127
+ return " Model loaded on file"
128
+ except Exception as e:
129
+ print(f"error loading model on file {e}")
130
+ return "Error loading model on file"
131
+
132
+ with gr.Blocks(title="Usta Model") as demo:
133
+ gr.Markdown("# Usta Model")
134
+ gr.Markdown(" Chat with the model")
135
+
136
+
137
+ chatbot = gr.Chatbot(height=300)
138
+ msg = gr.Textbox(placeholder="Enter your text here...", label="Message")
139
+
140
+ with gr.Row():
141
+ send_button = gr.Button("Send", variant="primary")
142
+ clear_button = gr.Button("Clear")
143
+
144
+
145
+ max_new_tokens = gr.Slider(
146
+ minimum=1,
147
+ maximum=30,
148
+ value=20,
149
+ step=1,
150
+ label="Max New Tokens",
151
+ info = "The maximum number of new tokens to generate"
152
+ )
153
+
154
+ gr.Markdown("## LOAD CUSTOM MODEL")
155
+ with gr.Row():
156
+ custom_model_url = gr.Textbox(
157
+ placeholder = "https://github.com/malibayram/llm-from-scratch/raw/refs/heads/main/u_model_4000.pth",
158
+ label = "Custom Model url",
159
+ scale = 4
160
+ )
161
+
162
+ load_url_button = gr.Button("Load Model", variant="primary",scale=1)
163
+
164
+ with gr.Row():
165
+ model_file = gr.File(
166
+ label = "Custom Model File",
167
+ file_types = [".pth", ".pt", ".bin"],
168
+ )
169
+
170
+ load_file_button = gr.Button("Load Model", variant="primary")
171
+
172
+ status = gr.Textbox(
173
+ label = "Model Status",
174
+ value = model_status,
175
+ interactive=False,
176
+ )
177
+
178
+
179
+ def send_message(message, chat_history, max_new_tokens):
180
+ if not message.strip():
181
+ return chat_history, ""
182
+
183
+ return chat_with_model(message, chat_history, max_new_tokens)
184
+
185
+ send_button.click(
186
+ send_message,
187
+ inputs = [msg,chatbot,max_new_tokens],
188
+ outputs=[chatbot,msg]
189
+ )
190
+
191
+ msg.submit(
192
+ send_message,
193
+ inputs=[msg,chatbot,max_new_tokens],
194
+ outputs=[chatbot,msg]
195
+ )
196
+
197
+ clear_button.click(lambda: None, None, chatbot, status)
198
+
199
+ load_url_button.click(
200
+ load_model_from_url,
201
+ inputs=[custom_model_url],
202
+ outputs=[status]
203
+ )
204
+
205
+
206
+ load_file_button.click(
207
+ load_model_from_file,
208
+ inputs=[model_file],
209
+ outputs=[status]
210
+ )
211
+
212
+
213
  if __name__ == "__main__":
214
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio >= 5.33.1
2
+ torch >= 2.7.1
3
+ requests >= 2.32.4
4
+ pydantic == 2.10.6
v1/__pycache__/usta_decoder_block.cpython-313.pyc ADDED
Binary file (1.6 kB). View file
 
v1/__pycache__/usta_embedding.cpython-313.pyc ADDED
Binary file (2.51 kB). View file
 
v1/__pycache__/usta_layer_norm.cpython-313.pyc ADDED
Binary file (1.39 kB). View file
 
v1/__pycache__/usta_mlp.cpython-313.pyc ADDED
Binary file (2.3 kB). View file
 
v1/__pycache__/usta_model.cpython-313.pyc ADDED
Binary file (2.55 kB). View file
 
v1/__pycache__/usta_multi_head_attention.cpython-313.pyc ADDED
Binary file (1.81 kB). View file
 
v1/__pycache__/usta_tokenizer.cpython-313.pyc ADDED
Binary file (2.64 kB). View file
 
v1/tokenize.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "the": 0,
4
+ "capital": 1,
5
+ "of": 2,
6
+ "united": 3,
7
+ "state": 4,
8
+ "is": 5,
9
+ "not": 6,
10
+ "london": 7,
11
+ "france": 8,
12
+ "paris": 9,
13
+ "and": 10,
14
+ "berlin": 11,
15
+ "germany": 12,
16
+ "rome": 13,
17
+ "in": 14,
18
+ "italy": 15,
19
+ "madrid": 16,
20
+ "spain": 17,
21
+ "lisbon": 18,
22
+ "portugal": 19,
23
+ "kingdom": 20,
24
+ "washington": 21,
25
+ "although": 22,
26
+ "these": 23,
27
+ "place": 24,
28
+ "are": 25,
29
+ "often": 26,
30
+ "mention": 27,
31
+ "together": 28,
32
+ "each": 29,
33
+ "country": 30,
34
+ "has": 31,
35
+ "its": 32,
36
+ "own": 33,
37
+ "identity": 34,
38
+ "any": 35,
39
+ "european": 36,
40
+ "city": 37,
41
+ "remain": 38,
42
+ "important": 39,
43
+ "with": 40,
44
+ "a": 41,
45
+ "rich": 42,
46
+ "history": 43,
47
+ "culture": 44,
48
+ "europe": 45,
49
+ "made": 46,
50
+ "many": 47,
51
+ "unique": 48,
52
+ "world": 49,
53
+ "while": 50,
54
+ "known": 51,
55
+ "for": 52,
56
+ "art": 53,
57
+ "fashion": 54,
58
+ "famous": 55,
59
+ "they": 56,
60
+ "ed": 57,
61
+ "s": 58,
62
+ ".": 59,
63
+ ",": 60,
64
+ " ": 61,
65
+ "<unk>": 62,
66
+ "<pad>": 63
67
+ }
v1/u1_model.pth ADDED
Binary file (95.9 kB). View file
 
v1/u_model.pth ADDED
Binary file (97.2 kB). View file
 
v1/u_model2.pth ADDED
Binary file (97.2 kB). View file
 
v1/usta_causal_attention.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class UstaCausalAttention(nn.Module):
5
+ def __init__(self, embedding_dim, output_dim, context_length, dropout_rate = 0):
6
+ super().__init__()
7
+ self.embedding_dim = embedding_dim
8
+
9
+ self.q_weights = nn.Linear(embedding_dim, output_dim, bias=False)
10
+ self.k_weights = nn.Linear(embedding_dim, output_dim, bias=False)
11
+ self.v_weights = nn.Linear(embedding_dim, output_dim, bias=False)
12
+ self.dropout = nn.Dropout(dropout_rate)
13
+ self.register_buffer("mask", torch.tril(torch.ones(context_length, context_length)))
14
+ self.context_length = context_length
15
+
16
+ def forward(self, x):
17
+ number_of_context = x.shape[0]
18
+ # truncate
19
+ x = x[:self.context_length]
20
+ q = self.q_weights(x)
21
+ k = self.k_weights(x)
22
+ v = self.v_weights(x)
23
+
24
+ attention_scores = q @ k.T
25
+ attention_scores = attention_scores.masked_fill(
26
+ self.mask.bool()[:number_of_context, :number_of_context] == 0, -torch.inf
27
+ )
28
+ attention_scores = torch.softmax(attention_scores / k.shape[-1] ** 0.5, dim=1)
29
+ attention_scores = self.dropout(attention_scores)
30
+ return attention_scores @ v
31
+
v1/usta_decoder_block.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+
3
+ from .usta_multi_head_attention import UstaMultiHeadAttention
4
+
5
+ from .usta_layer_norm import UstaLayerNorm
6
+
7
+ from .usta_mlp import UstaMLP
8
+
9
+ class UstaDecoderBlock(nn.Module):
10
+ def __init__(self, embedding_dim, num_heads, context_length):
11
+ super().__init__()
12
+
13
+ self.self_attention = UstaMultiHeadAttention(embedding_dim, embedding_dim, context_length, num_heads, dropout_rate=0.5)
14
+ self.norm1 = UstaLayerNorm(embedding_dim)
15
+ self.mlp = UstaMLP(embedding_dim, embedding_dim)
16
+ self.norm2 = UstaLayerNorm(embedding_dim)
17
+
18
+ def forward(self, x):
19
+ res = self.norm1(x)
20
+
21
+ x = self.self_attention(x)
22
+ x = self.norm1(x)
23
+
24
+
25
+ x = x + res
26
+
27
+ res = x
28
+
29
+ res = self.norm2(x)
30
+ x = self.norm2(x)
31
+
32
+ x = x + res
33
+
34
+ return x
35
+
36
+
37
+
38
+
39
+
40
+
41
+
v1/usta_embedding.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ import torch
6
+ import torch.nn as nn
7
+
8
+ def get_rotary_position_encoding(input: torch.Tensor, base= 10000, device = "cpu"):
9
+ context_length, dimension = input.shape
10
+
11
+ assert dimension % 2 == 0, "dimension must be even"
12
+
13
+ half_dimension = dimension // 2
14
+
15
+ freqs_indices = torch.arange(0, half_dimension, device = device, dtype = torch.float32)
16
+
17
+
18
+ freqs = 1.0 / (base ** (freqs_indices / dimension))
19
+
20
+ positions = torch.arange(0, context_length, device = device, dtype = torch.float32).unsqueeze(1)
21
+
22
+ angles = positions * freqs
23
+
24
+ sin_angles = torch.sin(angles)
25
+ cos_angles = torch.cos(angles)
26
+
27
+ input_even = input[:, :dimension // 2]
28
+ input_odd = input[:, dimension // 2:]
29
+
30
+ input_even_rotated = input_even * cos_angles - input_odd * sin_angles
31
+ input_odd_rotated = input_even * sin_angles + input_odd * cos_angles
32
+
33
+ input_rotated = torch.empty_like(input)
34
+
35
+ input_rotated[:, :dimension //2] = input_even_rotated
36
+ input_rotated[:, dimension // 2:] = input_odd_rotated
37
+
38
+ return input_rotated
39
+
40
+
41
+ class UstaEmbedding(nn.Module):
42
+ def __init__(self, vocab_size, embedding_dim, context_length):
43
+ super().__init__()
44
+ self.embedding = nn.Embedding(vocab_size, embedding_dim)
45
+ self.get_pos = get_rotary_position_encoding # Burada atadık ✅
46
+
47
+ def forward(self, x):
48
+ x = self.embedding(x)
49
+ x = self.get_pos(x) # ✅ Düzeltilmiş satır
50
+ return x
51
+
52
+
53
+
v1/usta_layer_norm.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class UstaLayerNorm(nn.Module):
5
+ def __init__(self,embedding_dim, eps=1e-5):
6
+ super().__init__()
7
+ self.eps = eps
8
+
9
+ self.weight = nn.Parameter(torch.ones(embedding_dim))
10
+
11
+
12
+ def forward(self,x):
13
+
14
+ mean = x.mean(dim=-1, keepdim=True)
15
+ variance = x.var(dim=-1,keepdim=True, unbiased=False)
16
+ normalized_x = (x - mean) / torch.sqrt(variance + self.eps)
17
+ return self.weight * normalized_x
18
+
19
+
v1/usta_mlp.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.functional as F
3
+ import torch.nn as nn
4
+
5
+
6
+ class GELU(nn.Module):
7
+ def __init__(self):
8
+ super().__init__()
9
+
10
+ def forward(self, x):
11
+ return 0.5 * x * (
12
+ 1 + torch.tanh(
13
+ torch.sqrt(torch.tensor(2 / torch.pi)) * (x + 0.044715 * torch.pow(x , 3))
14
+ )
15
+ )
16
+
17
+
18
+ class UstaMLP(nn.Module):
19
+ def __init__(self, embedding_dim, hidden_dim):
20
+ super().__init__()
21
+
22
+ self.gate_proj = nn.Linear(embedding_dim, hidden_dim)
23
+ self.up_proj = nn.Linear(embedding_dim, hidden_dim)
24
+ self.down_proj = nn.Linear(hidden_dim, embedding_dim)
25
+ self.gelu = GELU()
26
+
27
+
28
+ def forward(self, x):
29
+ gate = self.gate_proj(x)
30
+ gate = self.gelu(gate)
31
+ up = self.up_proj(x)
32
+ fuse = gate * up
33
+ outputs = self.down_proj(fuse)
34
+ return outputs
v1/usta_model.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from .usta_decoder_block import UstaDecoderBlock
5
+ from .usta_embedding import UstaEmbedding
6
+
7
+
8
+ class UstaModel(nn.Module):
9
+ def __init__(self, vocab_size, embedding_dim, num_heads, context_length, num_layers):
10
+ super().__init__()
11
+
12
+ self.embedding = UstaEmbedding(vocab_size, embedding_dim, context_length)
13
+ self.layers = nn.Sequential(
14
+ *[UstaDecoderBlock(embedding_dim, num_heads, context_length) for _ in range(num_layers)]
15
+ )
16
+
17
+ self.lm_head = nn.Linear(embedding_dim, vocab_size)
18
+
19
+ def forward(self, x: torch.Tensor):
20
+ x = self.embedding(x) # dictionary meaning of the tokens (words)
21
+
22
+ x = self.layers(x)
23
+ x = self.lm_head(x)
24
+
25
+ return x
26
+
27
+
28
+ """ out = u_model(torch.tensor(new_tokens))
29
+
30
+ probs = torch.softmax(out[-1], dim=-1)
31
+ max_prob, max_index = torch.max(probs, dim=-1)
32
+ max_prob, max_index, probs
33
+ """
34
+
35
+ def generate(self, x: torch.Tensor, max_new_tokens: int): # top_k, top_p, temperature
36
+ tokens = x.detach().cpu().numpy().tolist()
37
+
38
+ for _ in range(max_new_tokens):
39
+ out = self.forward(x)
40
+ probs = torch.softmax(out[-1], dim=-1)
41
+ _, max_index = torch.max(probs, dim=-1)
42
+ tokens.append(max_index.item())
43
+ if max_index == 59 or len(tokens) > 32: # <eos> and max context length
44
+ break
45
+
46
+ x = torch.tensor(tokens)
47
+
48
+ return tokens
49
+
v1/usta_multi_head_attention.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ import torch.nn as nn
4
+
5
+
6
+ class UstaMultiHeadAttention(nn.Module):
7
+ def __init__(self, embedding_dim, output_dim, context_length, num_heads, dropout_rate = 0):
8
+ super().__init__()
9
+
10
+
11
+ self.context_length = context_length
12
+
13
+ self.multi_head_attention = nn.MultiheadAttention(embedding_dim, num_heads, dropout= dropout_rate)
14
+
15
+ self.projection = nn.Linear(embedding_dim, output_dim)
16
+
17
+ self.register_buffer("mask", torch.triu(torch.ones(context_length, context_length), diagonal=1).bool())
18
+
19
+
20
+
21
+ def forward(self, x):
22
+
23
+ number_of_tokens = x.shape[0]
24
+ x = x[:self.context_length]
25
+ attention_mask = self.mask[:number_of_tokens, :number_of_tokens]
26
+ out, _ = self.multi_head_attention(x, x, x, attn_mask = attention_mask)
27
+ out = self.projection(out)
28
+
29
+ return out
30
+
31
+
v1/usta_self_attention.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class UstaSelfAttention(nn.Module):
5
+ def __init__(self, embedding_dim, output_dim):
6
+ super().__init__()
7
+ self.embedding_dim = embedding_dim
8
+
9
+ self.q_weights = nn.Linear(embedding_dim, output_dim, bias=False)
10
+ self.k_weights = nn.Linear(embedding_dim, output_dim, bias=False)
11
+ self.v_weights = nn.Linear(embedding_dim, output_dim, bias=False)
12
+
13
+ def forward(self, x):
14
+ q = self.q_weights(x)
15
+ k = self.k_weights(x)
16
+ v = self.v_weights(x)
17
+
18
+ attention_scores = q @ k.T
19
+ attenntion_weights = torch.softmax(attention_scores / k.shape[-1] ** 0.5, dim=1)
20
+ return attenntion_weights @ v
21
+
v1/usta_tokenizer.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import torch
3
+
4
+
5
+ class UstaTokenizer:
6
+ def __init__(self, vocab_file):
7
+ with open(vocab_file, "r") as f:
8
+ self.vocab = json.load(f)
9
+ self.reverse_vocab = {v: k for k, v in self.vocab.items()}
10
+
11
+ # kelimeden geriye dogru gidilerek tokenler kontrol edilir
12
+ def encode(self, text):
13
+ tokens = []
14
+ for word in text.split():
15
+ i = 0
16
+ while i < len(word):
17
+ found_match = False
18
+ for j in range(len(word), i, -1):
19
+ sub_word = word[i:j]
20
+ if sub_word in self.vocab:
21
+ tokens.append(self.vocab[sub_word])
22
+ i = j
23
+ found_match = True
24
+ break
25
+ if not found_match:
26
+ tokens.append(self.vocab["<unk>"])
27
+ i += 1
28
+ tokens.append(self.vocab[" "])
29
+ tokens.pop()
30
+ #return tokens
31
+ return torch.tensor(tokens)
32
+ def tokenize(self, text):
33
+ token_ids = self.encode(text)
34
+
35
+ token_ids = token_ids.detach().numpy().tolist()
36
+ return [self.reverse_vocab[id] for id in token_ids]
37
+
38
+
39
+
40
+
41
+
42
+ def decode(self, ids):
43
+ text = ""
44
+ for id in ids:
45
+ text += self.reverse_vocab[id]
46
+ return text
47
+