AlexSychovUN commited on
Commit
229e134
·
1 Parent(s): 077b6c3

Added files

Browse files
pinns_practice/basic_pinn.py CHANGED
@@ -2,28 +2,32 @@ import torch
2
  import torch.nn as nn
3
  import matplotlib.pyplot as plt
4
 
 
5
  class BasicPINN(nn.Module):
6
  def __init__(self):
7
  super().__init__()
8
  self.net = nn.Sequential(
9
  nn.Linear(1, 20),
10
- nn.Tanh(), # for RELU 2 derivative is always 0, so use Tanh
11
  nn.Linear(20, 20),
12
  nn.Tanh(),
13
- nn.Linear(20, 1)
14
  )
15
 
16
  def forward(self, x):
17
- x =self.net(x)
18
  return x
19
 
 
20
  model = BasicPINN()
21
  optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
22
 
23
  for epoch in range(2000):
24
  optimizer.zero_grad()
25
 
26
- t_physics = torch.rand(100, 1).requires_grad_(True) # requires_grad=True important for derivatives
 
 
27
 
28
  y_pred = model(t_physics)
29
 
@@ -33,10 +37,12 @@ for epoch in range(2000):
33
  # Calculating derivative dy/dt
34
  # We use PyTorch auto-differentiation to find the rate of change of y_pred with respect to t_physics.
35
  dy_dt = torch.autograd.grad(
36
- outputs=y_pred, # What we differentiate, y
37
- inputs=t_physics, # What we differentiate with respect to, (time, t)
38
- grad_outputs=torch.ones_like(y_pred), # vector from 1, for 100 examples, calculates gradients independently
39
- create_graph=True, # history of calculations, critical for PINNs
 
 
40
  )[0]
41
 
42
  # Physical Loss dy/dt + 2y = 0
@@ -60,7 +66,9 @@ with torch.no_grad():
60
 
61
 
62
  y_exact = torch.exp(-2 * t_test)
63
- plt.plot(t_test.numpy(), y_test_pred.numpy(), label="PINN model", color="red", linestyle="--")
 
 
64
  plt.plot(t_test.numpy(), y_exact.numpy(), label="Exact solution (Math)", alpha=0.5)
65
  plt.legend()
66
  plt.title("Solving the differential equation!!")
 
2
  import torch.nn as nn
3
  import matplotlib.pyplot as plt
4
 
5
+
6
  class BasicPINN(nn.Module):
7
  def __init__(self):
8
  super().__init__()
9
  self.net = nn.Sequential(
10
  nn.Linear(1, 20),
11
+ nn.Tanh(), # for RELU 2 derivative is always 0, so use Tanh
12
  nn.Linear(20, 20),
13
  nn.Tanh(),
14
+ nn.Linear(20, 1),
15
  )
16
 
17
  def forward(self, x):
18
+ x = self.net(x)
19
  return x
20
 
21
+
22
  model = BasicPINN()
23
  optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
24
 
25
  for epoch in range(2000):
26
  optimizer.zero_grad()
27
 
28
+ t_physics = torch.rand(100, 1).requires_grad_(
29
+ True
30
+ ) # requires_grad=True important for derivatives
31
 
32
  y_pred = model(t_physics)
33
 
 
37
  # Calculating derivative dy/dt
38
  # We use PyTorch auto-differentiation to find the rate of change of y_pred with respect to t_physics.
39
  dy_dt = torch.autograd.grad(
40
+ outputs=y_pred, # What we differentiate, y
41
+ inputs=t_physics, # What we differentiate with respect to, (time, t)
42
+ grad_outputs=torch.ones_like(
43
+ y_pred
44
+ ), # vector from 1, for 100 examples, calculates gradients independently
45
+ create_graph=True, # history of calculations, critical for PINNs
46
  )[0]
47
 
48
  # Physical Loss dy/dt + 2y = 0
 
66
 
67
 
68
  y_exact = torch.exp(-2 * t_test)
69
+ plt.plot(
70
+ t_test.numpy(), y_test_pred.numpy(), label="PINN model", color="red", linestyle="--"
71
+ )
72
  plt.plot(t_test.numpy(), y_exact.numpy(), label="Exact solution (Math)", alpha=0.5)
73
  plt.legend()
74
  plt.title("Solving the differential equation!!")
transformer_from_scratch/dataset.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch.utils.data import Dataset
4
+
5
+
6
+ class BilingualDataset(Dataset):
7
+ def __init__(self, ds, tokenizer_src, tokenizer_tgt, src_lang, tgt_lang, seq_len):
8
+ super().__init__()
9
+
10
+ self.ds = ds
11
+ self.tokenizer_src = tokenizer_src
12
+ self.tokenizer_tgt = tokenizer_tgt
13
+ self.src_lang = src_lang
14
+ self.tgt_lang = tgt_lang
15
+ self.seq_len = seq_len
16
+
17
+ self.sos_token = torch.tensor([tokenizer_src.token_to_id(['[SOS]'])], dtype=torch.int64)
18
+ self.eos_token = torch.tensor([tokenizer_src.token_to_id(['[EOS]'])], dtype=torch.int64)
19
+ self.pad_token = torch.tensor([tokenizer_src.token_to_id(['[PAD]'])], dtype=torch.int64)
20
+
21
+ def __len__(self):
22
+ return len(self.ds)
23
+
24
+ def __getitem__(self, index):
25
+ src_target_pair = self.ds[index]
26
+ src_text = src_target_pair['translation'][self.src_lang]
27
+ tgt_text = src_target_pair['translation'][self.tgt_lang]
28
+
29
+ enc_input_tokens = self.tokenizer_src.encode(src_text).ids
30
+ dec_input_tokens = self.tokenizer_tgt.encode(tgt_text).ids
31
+
32
+ enc_num_padding_tokens = self.seq_len - len(enc_input_tokens) - 2 # for SOS and EOS
33
+ dec_num_padding_tokens = self.seq_len - len(dec_input_tokens) - 1 # for SOS
34
+
35
+ if enc_num_padding_tokens < 0 or dec_num_padding_tokens < 0:
36
+ raise ValueError('Sentence is too long')
37
+
38
+ # Add SOS and EOS tokens to source text
39
+ encoder_input = torch.cat(
40
+ [
41
+ self.sos_token,
42
+ torch.tensor(enc_input_tokens, dtype=torch.int64),
43
+ self.eos_token,
44
+ torch.tensor([self.pad_token] * enc_num_padding_tokens, dtype=torch.int64)
45
+ ]
46
+ )
47
+ # Add SOS token to the decoder input
48
+ decoder_input = torch.cat(
49
+ [
50
+ self.sos_token,
51
+ torch.tensor(dec_input_tokens, dtype=torch.int64),
52
+ torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype=torch.int64)
53
+ ]
54
+ )
55
+ # Add EOS token to the label (what we want )
56
+ label = torch.cat(
57
+ [
58
+ torch.tensor(dec_input_tokens, dtype=torch.int64),
59
+ self.eos_token,
60
+ torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype=torch.int64)
61
+ ]
62
+ )
63
+
64
+ assert encoder_input.size(0) == self.seq_len
65
+ assert decoder_input.size(0) == self.seq_len
66
+ assert label.size(0) == self.seq_len
67
+
68
+ return {
69
+ "encoder_input": encoder_input, # (Seq_len)
70
+ "decoder_input": decoder_input, # (Seq_len)
71
+ "encoder_mask": (encoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int(), # (1, 1, Seq_len)
72
+ "decoder_mask": (decoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int() & casual_mask(decoder_input.size(0)), # (1, Seq_len) & (1, Seq_len, Seq_len)
73
+ "label": label, # (Seq_len)
74
+ }
transformer_from_scratch/model.py CHANGED
@@ -174,20 +174,36 @@ class Encoder(nn.Module):
174
 
175
 
176
  class DecoderBlock(nn.Module):
177
- def __init__(self, self_attention_block: MultiHeadAttention, cross_attention_block: MultiHeadAttention, feed_forward_block: FeedForwardBlock, dropout: float):
 
 
 
 
 
 
178
  super().__init__()
179
  self.self_attention_block = self_attention_block
180
  self.cross_attention_block = cross_attention_block
181
  self.feed_forward_block = feed_forward_block
182
- self.residual_connections = nn.ModuleList([ResidualConnection(dropout) for _ in range(3)])
 
 
183
 
184
  # x - input of the decoder, src_mask - mask for encoder, tgt_mask - mask applied to the decoder
185
  def forward(self, x, encoder_output, src_mask, tgt_mask):
186
- x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, tgt_mask))
187
- x = self.residual_connections[1](x, lambda x: self.cross_attention_block(x, encoder_output, encoder_output, src_mask))
 
 
 
 
 
 
 
188
  x = self.residual_connections[2](x, self.feed_forward_block)
189
  return x
190
 
 
191
  class Decoder(nn.Module):
192
  def __init__(self, layers: nn.ModuleList):
193
  super().__init__()
@@ -210,9 +226,17 @@ class ProjectionLayer(nn.Module):
210
  return torch.log_softmax(self.proj(x), dim=-1)
211
 
212
 
213
-
214
  class Transformer(nn.Module):
215
- def __init__(self, encoder: Encoder, decoder: Decoder, src_embed: InputEmbeddings, tgt_embed: InputEmbeddings, src_pos: PositionalEncoding, tgt_pos: PositionalEncoding, projection_layer: ProjectionLayer):
 
 
 
 
 
 
 
 
 
216
  super().__init__()
217
  self.encoder = encoder
218
  self.decoder = decoder
@@ -236,7 +260,17 @@ class Transformer(nn.Module):
236
  return self.projection_layer(x)
237
 
238
 
239
- def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int, tgt_seq_len: int, d_model: int = 512, N: int = 6, h: int = 8, dropout: int = 0.1, d_ff: int = 2048):
 
 
 
 
 
 
 
 
 
 
240
  # Create the embedding layers
241
  src_embed = InputEmbeddings(d_model, src_vocab_size)
242
  tgt_embed = InputEmbeddings(d_model, tgt_vocab_size)
@@ -250,7 +284,9 @@ def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int
250
  for _ in range(N):
251
  encoder_self_attention_block = MultiHeadAttention(d_model, h, dropout)
252
  feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)
253
- encoder_block = EncoderBlock(encoder_self_attention_block, feed_forward_block, dropout)
 
 
254
  encoder_blocks.append(encoder_block)
255
 
256
  # Create the decoder blocks
@@ -259,7 +295,12 @@ def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int
259
  decoder_self_attention_block = MultiHeadAttention(d_model, h, dropout)
260
  decoder_cross_attention_block = MultiHeadAttention(d_model, h, dropout)
261
  feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)
262
- decoder_block = DecoderBlock(decoder_self_attention_block, decoder_cross_attention_block, feed_forward_block, dropout)
 
 
 
 
 
263
  decoder_blocks.append(decoder_block)
264
 
265
  # Create the encoder and decoder
@@ -270,7 +311,9 @@ def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int
270
  projection_layer = ProjectionLayer(d_model, tgt_vocab_size)
271
 
272
  # Build the transformer
273
- transformer = Transformer(encoder, decoder, src_embed, tgt_embed, src_pos, tgt_pos, projection_layer)
 
 
274
 
275
  # Initialize the parameters
276
  for p in transformer.parameters():
@@ -278,4 +321,3 @@ def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int
278
  nn.init.xavier_uniform_(p)
279
 
280
  return transformer
281
-
 
174
 
175
 
176
  class DecoderBlock(nn.Module):
177
+ def __init__(
178
+ self,
179
+ self_attention_block: MultiHeadAttention,
180
+ cross_attention_block: MultiHeadAttention,
181
+ feed_forward_block: FeedForwardBlock,
182
+ dropout: float,
183
+ ):
184
  super().__init__()
185
  self.self_attention_block = self_attention_block
186
  self.cross_attention_block = cross_attention_block
187
  self.feed_forward_block = feed_forward_block
188
+ self.residual_connections = nn.ModuleList(
189
+ [ResidualConnection(dropout) for _ in range(3)]
190
+ )
191
 
192
  # x - input of the decoder, src_mask - mask for encoder, tgt_mask - mask applied to the decoder
193
  def forward(self, x, encoder_output, src_mask, tgt_mask):
194
+ x = self.residual_connections[0](
195
+ x, lambda x: self.self_attention_block(x, x, x, tgt_mask)
196
+ )
197
+ x = self.residual_connections[1](
198
+ x,
199
+ lambda x: self.cross_attention_block(
200
+ x, encoder_output, encoder_output, src_mask
201
+ ),
202
+ )
203
  x = self.residual_connections[2](x, self.feed_forward_block)
204
  return x
205
 
206
+
207
  class Decoder(nn.Module):
208
  def __init__(self, layers: nn.ModuleList):
209
  super().__init__()
 
226
  return torch.log_softmax(self.proj(x), dim=-1)
227
 
228
 
 
229
  class Transformer(nn.Module):
230
+ def __init__(
231
+ self,
232
+ encoder: Encoder,
233
+ decoder: Decoder,
234
+ src_embed: InputEmbeddings,
235
+ tgt_embed: InputEmbeddings,
236
+ src_pos: PositionalEncoding,
237
+ tgt_pos: PositionalEncoding,
238
+ projection_layer: ProjectionLayer,
239
+ ):
240
  super().__init__()
241
  self.encoder = encoder
242
  self.decoder = decoder
 
260
  return self.projection_layer(x)
261
 
262
 
263
+ def build_transformer(
264
+ src_vocab_size: int,
265
+ tgt_vocab_size: int,
266
+ src_seq_len: int,
267
+ tgt_seq_len: int,
268
+ d_model: int = 512,
269
+ N: int = 6,
270
+ h: int = 8,
271
+ dropout: int = 0.1,
272
+ d_ff: int = 2048,
273
+ ):
274
  # Create the embedding layers
275
  src_embed = InputEmbeddings(d_model, src_vocab_size)
276
  tgt_embed = InputEmbeddings(d_model, tgt_vocab_size)
 
284
  for _ in range(N):
285
  encoder_self_attention_block = MultiHeadAttention(d_model, h, dropout)
286
  feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)
287
+ encoder_block = EncoderBlock(
288
+ encoder_self_attention_block, feed_forward_block, dropout
289
+ )
290
  encoder_blocks.append(encoder_block)
291
 
292
  # Create the decoder blocks
 
295
  decoder_self_attention_block = MultiHeadAttention(d_model, h, dropout)
296
  decoder_cross_attention_block = MultiHeadAttention(d_model, h, dropout)
297
  feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)
298
+ decoder_block = DecoderBlock(
299
+ decoder_self_attention_block,
300
+ decoder_cross_attention_block,
301
+ feed_forward_block,
302
+ dropout,
303
+ )
304
  decoder_blocks.append(decoder_block)
305
 
306
  # Create the encoder and decoder
 
311
  projection_layer = ProjectionLayer(d_model, tgt_vocab_size)
312
 
313
  # Build the transformer
314
+ transformer = Transformer(
315
+ encoder, decoder, src_embed, tgt_embed, src_pos, tgt_pos, projection_layer
316
+ )
317
 
318
  # Initialize the parameters
319
  for p in transformer.parameters():
 
321
  nn.init.xavier_uniform_(p)
322
 
323
  return transformer
 
transformer_from_scratch/train.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+
6
+ from datasets import load_dataset
7
+ from tokenizers import Tokenizer
8
+ from tokenizers.models import WordLevel
9
+ from tokenizers.trainers import WordLevelTrainer
10
+ from tokenizers.pre_tokenizers import Whitespace
11
+ from torch.utils.data import Dataset, DataLoader, random_split
12
+
13
+
14
+ def get_all_sentences(ds, lang):
15
+ for item in ds:
16
+ yield item["translation"][lang]
17
+
18
+
19
+ def get_or_build_tokenizer(config, ds, lang):
20
+ # config['tokenizer_file'] = '../tokenizers/tokenizer_{0}.json'
21
+ tokenizer_path = Path(config["tokenizer_file"].format(lang))
22
+ if not Path.exists(tokenizer_path):
23
+ tokenizer = Tokenizer(WordLevel(unk_token="[UNK]"))
24
+ tokenizer.pre_tokenizer = Whitespace()
25
+ trainer = WordLevelTrainer(
26
+ special_tokens=["[UNK]", "[PAD]", "SOS", "EOS"], min_frequency=2
27
+ )
28
+ tokenizer.train_from_iterator(get_all_sentences(ds, lang), trainer=trainer)
29
+ tokenizer.save(str(tokenizer_path))
30
+ else:
31
+ tokenizer = Tokenizer.from_file(str(tokenizer_path))
32
+ return tokenizer
33
+
34
+
35
+ def get_ds(config):
36
+ ds_raw = load_dataset(
37
+ "opus_books", f"{config['lang_src']}-{config['lang_tgt']}", split="train"
38
+ )
39
+
40
+ # Build tokenizers
41
+ tokenizer_src = get_or_build_tokenizer(config, ds_raw, config["lang_src"])
42
+ tokenizer_tgt = get_or_build_tokenizer(config, ds_raw, config["lang_tgt"])
43
+
44
+ # Keep 90% for training, 10% for validation
45
+ train_ds_size = int(0.9 * len(ds_raw))
46
+ val_ds_size = len(ds_raw) - train_ds_size
47
+ train_ds_raw, val_ds_raw = random_split(ds_raw, [train_ds_size, val_ds_size])