update model
Browse files- add second layer
- extend h from 64 to 128
- extend bs from 64 to 128
- reduce epochs from 20 to 10
- decoder.pt +2 -2
- inference.py +5 -5
- model.py +9 -9
decoder.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40166a619da9600828596e066ead3b62fc19a20b5329133299611c1862b316a1
|
| 3 |
+
size 1047944
|
inference.py
CHANGED
|
@@ -8,14 +8,14 @@ import torch.nn.functional as F
|
|
| 8 |
class DecoderGRU(nn.Module):
|
| 9 |
def __init__(self, hidden_size, output_size):
|
| 10 |
super(DecoderGRU, self).__init__()
|
| 11 |
-
self.proj = nn.Linear(hidden_size, hidden_size)
|
| 12 |
self.embedding = nn.Embedding(output_size, hidden_size)
|
| 13 |
-
self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
|
| 14 |
self.out = nn.Linear(hidden_size, output_size)
|
| 15 |
|
| 16 |
def forward(self, encoder_sample, target_tensor=None, max_length=16):
|
| 17 |
batch_size = encoder_sample.size(0)
|
| 18 |
-
decoder_hidden = self.proj(encoder_sample).
|
| 19 |
if target_tensor is not None:
|
| 20 |
decoder_input = target_tensor
|
| 21 |
decoder_outputs, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
|
|
@@ -38,7 +38,7 @@ class DecoderGRU(nn.Module):
|
|
| 38 |
output = self.out(output)
|
| 39 |
return output, hidden
|
| 40 |
|
| 41 |
-
dec = torch.load('decoder.pt'
|
| 42 |
|
| 43 |
SOS_token = 1
|
| 44 |
EOS_token = 2
|
|
@@ -46,7 +46,7 @@ katakana = list('゠ァアィイゥウェエォオカガキギクグケゲコゴ
|
|
| 46 |
vocab = ['<pad>', '<sos>', '<eos>'] + katakana
|
| 47 |
vocab_dict = {v: k for k, v in enumerate(vocab)}
|
| 48 |
|
| 49 |
-
h=
|
| 50 |
max_len=40
|
| 51 |
|
| 52 |
def detokenize(tokens):
|
|
|
|
| 8 |
class DecoderGRU(nn.Module):
|
| 9 |
def __init__(self, hidden_size, output_size):
|
| 10 |
super(DecoderGRU, self).__init__()
|
| 11 |
+
self.proj = nn.Linear(hidden_size, 2 * hidden_size)
|
| 12 |
self.embedding = nn.Embedding(output_size, hidden_size)
|
| 13 |
+
self.gru = nn.GRU(hidden_size, hidden_size, num_layers=2, batch_first=True)
|
| 14 |
self.out = nn.Linear(hidden_size, output_size)
|
| 15 |
|
| 16 |
def forward(self, encoder_sample, target_tensor=None, max_length=16):
|
| 17 |
batch_size = encoder_sample.size(0)
|
| 18 |
+
decoder_hidden = self.proj(encoder_sample).view(batch_size, 2, -1).permute(1, 0, 2).contiguous()
|
| 19 |
if target_tensor is not None:
|
| 20 |
decoder_input = target_tensor
|
| 21 |
decoder_outputs, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
|
|
|
|
| 38 |
output = self.out(output)
|
| 39 |
return output, hidden
|
| 40 |
|
| 41 |
+
dec = torch.load('decoder.pt', map_location='cpu')
|
| 42 |
|
| 43 |
SOS_token = 1
|
| 44 |
EOS_token = 2
|
|
|
|
| 46 |
vocab = ['<pad>', '<sos>', '<eos>'] + katakana
|
| 47 |
vocab_dict = {v: k for k, v in enumerate(vocab)}
|
| 48 |
|
| 49 |
+
h=128
|
| 50 |
max_len=40
|
| 51 |
|
| 52 |
def detokenize(tokens):
|
model.py
CHANGED
|
@@ -19,11 +19,11 @@ vocab_dict = {v: k for k, v in enumerate(vocab)}
|
|
| 19 |
|
| 20 |
texts = pd.read_csv('rolename.txt', header=None)[0].tolist()
|
| 21 |
vocab_size=len(vocab)
|
| 22 |
-
h=
|
| 23 |
max_len=40
|
| 24 |
-
bs=
|
| 25 |
lr=1e-3
|
| 26 |
-
epochs=
|
| 27 |
|
| 28 |
def tokenize(text):
|
| 29 |
return [vocab_dict[ch] for ch in text]
|
|
@@ -54,9 +54,9 @@ class EncoderVAEBiGRU(nn.Module):
|
|
| 54 |
super(EncoderVAEBiGRU, self).__init__()
|
| 55 |
self.hidden_size = hidden_size
|
| 56 |
self.embedding = nn.Embedding(input_size, hidden_size)
|
| 57 |
-
self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True, bidirectional=True)
|
| 58 |
-
self.proj_mu = nn.Linear(
|
| 59 |
-
self.proj_sigma = nn.Linear(
|
| 60 |
self.dropout = nn.Dropout(dropout_p)
|
| 61 |
self.bn = BatchNormVAE(hidden_size)
|
| 62 |
|
|
@@ -78,14 +78,14 @@ class EncoderVAEBiGRU(nn.Module):
|
|
| 78 |
class DecoderGRU(nn.Module):
|
| 79 |
def __init__(self, hidden_size, output_size):
|
| 80 |
super(DecoderGRU, self).__init__()
|
| 81 |
-
self.proj = nn.Linear(hidden_size, hidden_size)
|
| 82 |
self.embedding = nn.Embedding(output_size, hidden_size)
|
| 83 |
-
self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
|
| 84 |
self.out = nn.Linear(hidden_size, output_size)
|
| 85 |
|
| 86 |
def forward(self, encoder_sample, target_tensor=None, max_length=16):
|
| 87 |
batch_size = encoder_sample.size(0)
|
| 88 |
-
decoder_hidden = self.proj(encoder_sample).
|
| 89 |
if target_tensor is not None:
|
| 90 |
decoder_input = target_tensor
|
| 91 |
decoder_outputs, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
|
|
|
|
| 19 |
|
| 20 |
texts = pd.read_csv('rolename.txt', header=None)[0].tolist()
|
| 21 |
vocab_size=len(vocab)
|
| 22 |
+
h=128
|
| 23 |
max_len=40
|
| 24 |
+
bs=128
|
| 25 |
lr=1e-3
|
| 26 |
+
epochs=10
|
| 27 |
|
| 28 |
def tokenize(text):
|
| 29 |
return [vocab_dict[ch] for ch in text]
|
|
|
|
| 54 |
super(EncoderVAEBiGRU, self).__init__()
|
| 55 |
self.hidden_size = hidden_size
|
| 56 |
self.embedding = nn.Embedding(input_size, hidden_size)
|
| 57 |
+
self.gru = nn.GRU(hidden_size, hidden_size, num_layers=2, batch_first=True, bidirectional=True)
|
| 58 |
+
self.proj_mu = nn.Linear(4 * hidden_size, hidden_size)
|
| 59 |
+
self.proj_sigma = nn.Linear(4 * hidden_size, hidden_size)
|
| 60 |
self.dropout = nn.Dropout(dropout_p)
|
| 61 |
self.bn = BatchNormVAE(hidden_size)
|
| 62 |
|
|
|
|
| 78 |
class DecoderGRU(nn.Module):
|
| 79 |
def __init__(self, hidden_size, output_size):
|
| 80 |
super(DecoderGRU, self).__init__()
|
| 81 |
+
self.proj = nn.Linear(hidden_size, 2 * hidden_size)
|
| 82 |
self.embedding = nn.Embedding(output_size, hidden_size)
|
| 83 |
+
self.gru = nn.GRU(hidden_size, hidden_size, num_layers=2, batch_first=True)
|
| 84 |
self.out = nn.Linear(hidden_size, output_size)
|
| 85 |
|
| 86 |
def forward(self, encoder_sample, target_tensor=None, max_length=16):
|
| 87 |
batch_size = encoder_sample.size(0)
|
| 88 |
+
decoder_hidden = self.proj(encoder_sample).view(batch_size, 2, -1).permute(1, 0, 2).contiguous()
|
| 89 |
if target_tensor is not None:
|
| 90 |
decoder_input = target_tensor
|
| 91 |
decoder_outputs, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
|