Airin-chan commited on
Commit
5772d49
·
verified ·
1 Parent(s): 79df5f4

Upload 3 files

Browse files
Files changed (3) hide show
  1. LCTLM.pth +3 -0
  2. lctlm1.py +95 -0
  3. tokenizer.json +0 -0
LCTLM.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba04a030e9aaa5d1c88def1f8738ec6c465491f8ac91bad3a38e39c4d3df6a23
3
+ size 176879430
lctlm1.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """LCTLM1.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1WtvvYAajPbW2YCEkE5Cg0IT8lKN-lPfk
8
+ """
9
+
10
+ import torch
11
+ from torch import nn
12
+ from typing import Optional
13
+ class LCMBlock (nn.Module) :
14
+ """
15
+ LCm (Laten Connected Model ) block, looking attention as two preception and icreasing it
16
+ to N multiple magnitude values.
17
+ """
18
+
19
+ def __init__ (self,d_model :int, drop_rate : float = 0.1) :
20
+ """
21
+ args:
22
+ d_model : int
23
+ dimention of model
24
+
25
+ drop_rate : float
26
+ rate of dropout mechanism
27
+ """
28
+ super().__init__()
29
+ self.step1 = nn.Linear(d_model,d_model)
30
+ self.step2 = nn.Linear(d_model,d_model)
31
+ self.magnitude = nn.Linear(d_model,d_model)
32
+ self.drop = nn.Dropout(drop_rate)
33
+ self.gelu1 = nn.GELU(approximate='tanh')
34
+ self.gelu2 = nn.GELU(approximate='tanh')
35
+ self.tanh = nn.Tanh()
36
+ self.norm = nn.LayerNorm(d_model)
37
+
38
+ def forward(self,x) :
39
+ normx = self.norm(x)
40
+ step1 = self.step1(normx)
41
+ step1 = self.gelu1(step1)
42
+ step2 = self.step2(normx)
43
+ step2 = self.gelu2(step2)
44
+ laten = step1 + step2
45
+ laten - self.drop(laten)
46
+ laten = self.magnitude(laten)
47
+ laten = self.tanh(laten)
48
+ return x + laten
49
+
50
+ class LMLCTBlock (nn.Module) :
51
+ def __init__ (self,d_model,drop_rate) :
52
+ super().__init__()
53
+ self.attention = nn.MultiheadAttention(embed_dim=d_model,num_heads=8,dropout=drop_rate,batch_first=True)
54
+ self.norm = nn.LayerNorm(d_model)
55
+ self.lcmblock = LCMBlock(d_model,drop_rate)
56
+
57
+
58
+ def forward(self,x,mask) :
59
+
60
+ normx = self.norm(x)
61
+ attention,_ = self.attention(normx,normx,normx,attn_mask=mask)
62
+ x = x + attention
63
+ x = self.lcmblock(x)
64
+ return x
65
+
66
+ import math
67
+ class LMLCT1(nn.Module):
68
+ def __init__(self, d_model=512, vocab_size=30001, num_layers=6, drop_rate=0.1, maxpos=500):
69
+ super().__init__()
70
+ self.d_model = d_model
71
+ self.embedding = nn.Embedding(vocab_size, d_model, padding_idx=0)
72
+ self.pos_embedding = nn.Embedding(maxpos, d_model)
73
+ self.scale = math.sqrt(d_model)
74
+ self.ffn = nn.Sequential(
75
+ nn.Linear(d_model, d_model*4),
76
+ nn.GELU(),
77
+ nn.Linear(d_model*4, d_model),
78
+ )
79
+ self.layers = nn.ModuleList([LMLCTBlock(d_model, drop_rate) for _ in range(num_layers)])
80
+ self.out = nn.Linear(d_model, vocab_size)
81
+ mask = torch.triu(torch.ones(maxpos, maxpos), diagonal=1).bool()
82
+ self.register_buffer("causal_mask", mask)
83
+
84
+ def forward(self, x):
85
+ B, S = x.size()
86
+ pos_idx = torch.arange(S, device=x.device)
87
+ x = self.embedding(x) * self.scale
88
+ pos = self.pos_embedding(pos_idx).unsqueeze(0)
89
+ x = x + pos
90
+ mask = self.causal_mask[:S, :S]
91
+ for layer in self.layers:
92
+ x = layer(x, attn_mask=mask)
93
+ x = self.ffn(x)
94
+ logits = self.out(x)
95
+ return logits
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff