Sunbread commited on
Commit
ffb7d49
·
1 Parent(s): d17b8f3

update model

Browse files

- add second layer
- extend h from 64 to 128
- extend bs from 64 to 128
- reduce epochs from 20 to 10

Files changed (3) hide show
  1. decoder.pt +2 -2
  2. inference.py +5 -5
  3. model.py +9 -9
decoder.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6574bd2e0f77d393da6412bd11886c176e551dce94f4383b3bf81a5e1a61d745
3
- size 180232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40166a619da9600828596e066ead3b62fc19a20b5329133299611c1862b316a1
3
+ size 1047944
inference.py CHANGED
@@ -8,14 +8,14 @@ import torch.nn.functional as F
8
  class DecoderGRU(nn.Module):
9
  def __init__(self, hidden_size, output_size):
10
  super(DecoderGRU, self).__init__()
11
- self.proj = nn.Linear(hidden_size, hidden_size)
12
  self.embedding = nn.Embedding(output_size, hidden_size)
13
- self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
14
  self.out = nn.Linear(hidden_size, output_size)
15
 
16
  def forward(self, encoder_sample, target_tensor=None, max_length=16):
17
  batch_size = encoder_sample.size(0)
18
- decoder_hidden = self.proj(encoder_sample).unsqueeze(0)
19
  if target_tensor is not None:
20
  decoder_input = target_tensor
21
  decoder_outputs, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
@@ -38,7 +38,7 @@ class DecoderGRU(nn.Module):
38
  output = self.out(output)
39
  return output, hidden
40
 
41
- dec = torch.load('decoder.pt').to('cpu')
42
 
43
  SOS_token = 1
44
  EOS_token = 2
@@ -46,7 +46,7 @@ katakana = list('゠ァアィイゥウェエォオカガキギクグケゲコゴ
46
  vocab = ['<pad>', '<sos>', '<eos>'] + katakana
47
  vocab_dict = {v: k for k, v in enumerate(vocab)}
48
 
49
- h=64
50
  max_len=40
51
 
52
  def detokenize(tokens):
 
8
  class DecoderGRU(nn.Module):
9
  def __init__(self, hidden_size, output_size):
10
  super(DecoderGRU, self).__init__()
11
+ self.proj = nn.Linear(hidden_size, 2 * hidden_size)
12
  self.embedding = nn.Embedding(output_size, hidden_size)
13
+ self.gru = nn.GRU(hidden_size, hidden_size, num_layers=2, batch_first=True)
14
  self.out = nn.Linear(hidden_size, output_size)
15
 
16
  def forward(self, encoder_sample, target_tensor=None, max_length=16):
17
  batch_size = encoder_sample.size(0)
18
+ decoder_hidden = self.proj(encoder_sample).view(batch_size, 2, -1).permute(1, 0, 2).contiguous()
19
  if target_tensor is not None:
20
  decoder_input = target_tensor
21
  decoder_outputs, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
 
38
  output = self.out(output)
39
  return output, hidden
40
 
41
+ dec = torch.load('decoder.pt', map_location='cpu')
42
 
43
  SOS_token = 1
44
  EOS_token = 2
 
46
  vocab = ['<pad>', '<sos>', '<eos>'] + katakana
47
  vocab_dict = {v: k for k, v in enumerate(vocab)}
48
 
49
+ h=128
50
  max_len=40
51
 
52
  def detokenize(tokens):
model.py CHANGED
@@ -19,11 +19,11 @@ vocab_dict = {v: k for k, v in enumerate(vocab)}
19
 
20
  texts = pd.read_csv('rolename.txt', header=None)[0].tolist()
21
  vocab_size=len(vocab)
22
- h=64
23
  max_len=40
24
- bs=64
25
  lr=1e-3
26
- epochs=20
27
 
28
  def tokenize(text):
29
  return [vocab_dict[ch] for ch in text]
@@ -54,9 +54,9 @@ class EncoderVAEBiGRU(nn.Module):
54
  super(EncoderVAEBiGRU, self).__init__()
55
  self.hidden_size = hidden_size
56
  self.embedding = nn.Embedding(input_size, hidden_size)
57
- self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True, bidirectional=True)
58
- self.proj_mu = nn.Linear(2 * hidden_size, hidden_size)
59
- self.proj_sigma = nn.Linear(2 * hidden_size, hidden_size)
60
  self.dropout = nn.Dropout(dropout_p)
61
  self.bn = BatchNormVAE(hidden_size)
62
 
@@ -78,14 +78,14 @@ class EncoderVAEBiGRU(nn.Module):
78
  class DecoderGRU(nn.Module):
79
  def __init__(self, hidden_size, output_size):
80
  super(DecoderGRU, self).__init__()
81
- self.proj = nn.Linear(hidden_size, hidden_size)
82
  self.embedding = nn.Embedding(output_size, hidden_size)
83
- self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
84
  self.out = nn.Linear(hidden_size, output_size)
85
 
86
  def forward(self, encoder_sample, target_tensor=None, max_length=16):
87
  batch_size = encoder_sample.size(0)
88
- decoder_hidden = self.proj(encoder_sample).unsqueeze(0)
89
  if target_tensor is not None:
90
  decoder_input = target_tensor
91
  decoder_outputs, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
 
19
 
20
  texts = pd.read_csv('rolename.txt', header=None)[0].tolist()
21
  vocab_size=len(vocab)
22
+ h=128
23
  max_len=40
24
+ bs=128
25
  lr=1e-3
26
+ epochs=10
27
 
28
  def tokenize(text):
29
  return [vocab_dict[ch] for ch in text]
 
54
  super(EncoderVAEBiGRU, self).__init__()
55
  self.hidden_size = hidden_size
56
  self.embedding = nn.Embedding(input_size, hidden_size)
57
+ self.gru = nn.GRU(hidden_size, hidden_size, num_layers=2, batch_first=True, bidirectional=True)
58
+ self.proj_mu = nn.Linear(4 * hidden_size, hidden_size)
59
+ self.proj_sigma = nn.Linear(4 * hidden_size, hidden_size)
60
  self.dropout = nn.Dropout(dropout_p)
61
  self.bn = BatchNormVAE(hidden_size)
62
 
 
78
  class DecoderGRU(nn.Module):
79
  def __init__(self, hidden_size, output_size):
80
  super(DecoderGRU, self).__init__()
81
+ self.proj = nn.Linear(hidden_size, 2 * hidden_size)
82
  self.embedding = nn.Embedding(output_size, hidden_size)
83
+ self.gru = nn.GRU(hidden_size, hidden_size, num_layers=2, batch_first=True)
84
  self.out = nn.Linear(hidden_size, output_size)
85
 
86
  def forward(self, encoder_sample, target_tensor=None, max_length=16):
87
  batch_size = encoder_sample.size(0)
88
+ decoder_hidden = self.proj(encoder_sample).view(batch_size, 2, -1).permute(1, 0, 2).contiguous()
89
  if target_tensor is not None:
90
  decoder_input = target_tensor
91
  decoder_outputs, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)