lennonssss commited on
Commit
4cff2f4
·
verified ·
1 Parent(s): 53fe091

Upload tp2_nlp_lennon_chaves.py

Browse files
Files changed (1) hide show
  1. tp2_nlp_lennon_chaves.py +166 -0
tp2_nlp_lennon_chaves.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """TP2_NLP_Lennon_Chaves.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1ggDnqgrV0zUdbiI1exZQEjDT6ihRGlLY
8
+ """
9
+
10
+ # Preparação do Ambiente
11
+
12
+ # Configuração do Google Collaboratory e Instalação das Bibliotecas Necessárias
13
+
14
+ #!pip install torch transformers requests
15
+ #!pip install accelerate -U
16
+ #!pip install datasets
17
+
18
+ import torch
19
+ import torch.nn as nn
20
+ from transformers import GPT2Tokenizer, PreTrainedModel, PretrainedConfig
21
+ from torch.utils.data import Dataset, DataLoader
22
+ import requests
23
+ from datasets import load_dataset
24
+
25
+ # Coleta e Pré-processamento dos Dados
26
+
27
+ # Utilização do Conjunto de Dados TinyShakespeare
28
+ dataset = load_dataset('tiny_shakespeare')
29
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
30
+ tokenizer.pad_token = tokenizer.eos_token
31
+ # Tokenização e Limpeza dos Dados
32
+ def tokenize_function(examples):
33
+ return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=512)
34
+
35
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
36
+ tokenized_datasets.set_format(type='torch', columns=['input_ids'])
37
+
38
+ """**Configuração da Arquitetura do Modelo LLaMA 1**
39
+
40
+ Vamos implementar os componentes principais da arquitetura LLaMA 1: RMSNorm, SwiGLU e Rotary Embeddings. Em seguida, definiremos a rede neural completa usando PyTorch.
41
+ """
42
+
43
+ # Definição da Configuração e Modelo
44
+ class LLaMAConfig(PretrainedConfig):
45
+ model_type = "llama"
46
+
47
+ def __init__(self, vocab_size=50257, d_model=128, num_heads=4, num_layers=2, **kwargs):
48
+ self.vocab_size = vocab_size
49
+ self.d_model = d_model
50
+ self.num_heads = num_heads
51
+ self.num_layers = num_layers
52
+ super().__init__(**kwargs)
53
+
54
+ class LLaMAModel(PreTrainedModel):
55
+ config_class = LLaMAConfig
56
+
57
+ def __init__(self, config):
58
+ super().__init__(config)
59
+ self.embedding = nn.Embedding(config.vocab_size, config.d_model)
60
+ self.layers = nn.ModuleList([nn.TransformerEncoderLayer(config.d_model, config.num_heads) for _ in range(config.num_layers)])
61
+ self.norm = RMSNorm(config.d_model)
62
+ self.swiglu = SwiGLU(config.d_model)
63
+ self.rotary_emb = RotaryEmbeddings(config.d_model)
64
+ self.fc = nn.Linear(config.d_model, config.vocab_size)
65
+ self.init_weights()
66
+
67
+ def forward(self, x):
68
+ x = self.embedding(x)
69
+ for layer in self.layers:
70
+ x = layer(x)
71
+ x = self.norm(x)
72
+ x = self.swiglu(x)
73
+ x = self.rotary_emb(x)
74
+ x = self.fc(x)
75
+ return x
76
+
77
+ class RMSNorm(nn.Module):
78
+ def __init__(self, d):
79
+ super().__init__()
80
+ self.scale = nn.Parameter(torch.ones(d))
81
+
82
+ def forward(self, x):
83
+ norm_x = torch.norm(x, dim=-1, keepdim=True)
84
+ return self.scale * x / (norm_x + 1e-6)
85
+
86
+ class SwiGLU(nn.Module):
87
+ def __init__(self, d):
88
+ super().__init__()
89
+ self.linear1 = nn.Linear(d, d)
90
+ self.linear2 = nn.Linear(d, d)
91
+ self.silu = nn.SiLU()
92
+
93
+ def forward(self, x):
94
+ return self.linear1(x) * self.silu(self.linear2(x))
95
+
96
+ class RotaryEmbeddings(nn.Module):
97
+ def __init__(self, d):
98
+ super().__init__()
99
+ self.d = d
100
+
101
+ def forward(self, x):
102
+ half_dim = self.d // 2
103
+ emb = torch.cat([torch.cos(x[:, :, :half_dim]), torch.sin(x[:, :, half_dim:])], dim=-1)
104
+ return emb
105
+
106
+ config = LLaMAConfig(vocab_size=tokenizer.vocab_size, d_model=128, num_heads=4, num_layers=2)
107
+ model = LLaMAModel(config)
108
+
109
+ # Treinamento do Modelo
110
+
111
+ # Ajuste dos Hiperparâmetros
112
+ learning_rate = 5e-5
113
+ batch_size = 32
114
+ num_epochs = 100
115
+
116
+ optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
117
+ criterion = nn.CrossEntropyLoss()
118
+
119
+ # Função de Treinamento
120
+ def train(model, dataloader, optimizer, criterion, device):
121
+ model.train()
122
+ total_loss = 0
123
+ for batch in dataloader:
124
+ inputs = batch['input_ids'].to(device)
125
+ optimizer.zero_grad()
126
+ outputs = model(inputs)
127
+ loss = criterion(outputs.view(-1, vocab_size), inputs.view(-1))
128
+ loss.backward()
129
+ optimizer.step()
130
+ total_loss += loss.item()
131
+ return total_loss / len(dataloader)
132
+
133
+ # DataLoader para o conjunto de dados
134
+ from torch.utils.data import DataLoader
135
+
136
+ train_dataloader = DataLoader(tokenized_datasets['train'], batch_size=batch_size, shuffle=True)
137
+
138
+ # Treinamento
139
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
140
+ model.to(device)
141
+
142
+ for epoch in range(num_epochs):
143
+ loss = train(model, train_dataloader, optimizer, criterion, device)
144
+ print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss}")
145
+
146
+ # Avaliação do Modelo
147
+
148
+ # Função de Avaliação
149
+ def evaluate(model, dataloader, criterion, device):
150
+ model.eval()
151
+ total_loss = 0
152
+ with torch.no_grad():
153
+ for batch in dataloader:
154
+ inputs = batch['input_ids'].to(device)
155
+ outputs = model(inputs)
156
+ loss = criterion(outputs.view(-1, vocab_size), inputs.view(-1))
157
+ total_loss += loss.item()
158
+ return total_loss / len(dataloader)
159
+
160
+ # DataLoader para avaliação
161
+ eval_dataloader = DataLoader(tokenized_datasets['validation'], batch_size=batch_size)
162
+
163
+ # Avaliação
164
+ eval_loss = evaluate(model, eval_dataloader, criterion, device)
165
+ perplexity = torch.exp(torch.tensor(eval_loss))
166
+ print(f"Validation Loss: {eval_loss}, Perplexity: {perplexity}")