| import os |
| import time |
| import datetime |
|
|
| import pandas as pd |
| import seaborn as sns |
| import numpy as np |
| import random |
|
|
| import matplotlib.pyplot as plt |
|
|
| import torch |
| from torch.utils.data import Dataset, DataLoader, random_split, RandomSampler, SequentialSampler |
|
|
|
|
| from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, GPT2LMHeadModel |
| from transformers import AdamW, get_linear_schedule_with_warmup |
|
|
| import nltk |
| nltk.download('punkt') |
|
|
| import sys |
|
|
| import pytz |
| IST = pytz.timezone('Asia/Kolkata') |
| stamp = datetime.datetime.now(IST).strftime("%c") |
|
|
| print('\n') |
| print('='*100) |
| print('='*100) |
| print('\t\t=Experiment6=',stamp) |
| print('='*100) |
| print('='*100) |
|
|
| out_path = '/media/data_dump/Ritwik/ggpt/' |
|
|
|
|
| |
| |
| |
|
|
|
|
| |
|
|
| hyper_params = {'rseed': 123} |
|
|
| import torch, numpy as np, random, transformers, psutil, time |
| os.environ['PYTHONHASHSEED'] = str(hyper_params['rseed']) |
| |
| torch.manual_seed(hyper_params['rseed']) |
| torch.cuda.manual_seed(hyper_params['rseed']) |
| torch.cuda.manual_seed_all(hyper_params['rseed']) |
| |
| np.random.seed(hyper_params['rseed']) |
| random.seed(hyper_params['rseed']) |
| transformers.set_seed(hyper_params['rseed']) |
|
|
| |
| tokenizer = GPT2Tokenizer.from_pretrained('gpt2', bos_token='<|startoftext|>', eos_token='<|endoftext|>', pad_token='<|pad|>') |
|
|
| sfile = '/media/nas_mount/Ritwik/Ai4Bharat_text_corpora/data/en/en_clean.txt' |
| print(sfile) |
| file = open(sfile,'r') |
| lines = file.readlines() |
| file.close() |
| lines = [[x.strip()] for x in lines] |
|
|
| df = pd.DataFrame(lines, columns=['bio_main']) |
|
|
| print('Dataframe created') |
| df.dropna(inplace=True) |
| bios = df.bio_main.copy() |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| print("The max model length is {} for this model, although the actual embedding size for GPT small is 768".format(tokenizer.model_max_length)) |
| print("The beginning of sequence token {} token has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.bos_token_id), tokenizer.bos_token_id)) |
| print("The end of sequence token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.eos_token_id), tokenizer.eos_token_id)) |
| print("The padding token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.pad_token_id), tokenizer.pad_token_id)) |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| batch_size = 8 |
|
|
| class GPT2Dataset(Dataset): |
|
|
| def __init__(self, txt_list, tokenizer, gpt2_type="gpt2", max_length=768): |
|
|
| self.tokenizer = tokenizer |
| self.max_length = max_length |
| |
| |
| self.sents = list(txt_list) |
|
|
| |
| |
|
|
| |
|
|
| |
| |
| |
| def __len__(self): |
| |
| return len(self.sents) |
|
|
| def __getitem__(self, idx): |
| |
| txt = self.sents[idx] |
| encodings_dict = self.tokenizer('<|startoftext|>'+ txt + '<|endoftext|>', truncation=True, max_length=self.max_length, padding="max_length") |
| input_ids = torch.tensor(encodings_dict['input_ids']) |
| attn_masks = torch.tensor(encodings_dict['attention_mask']) |
| return input_ids, attn_masks |
|
|
| dataset = GPT2Dataset(bios, tokenizer, max_length=500) |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| train_size = int(0.9 * len(dataset)) |
| val_size = len(dataset) - train_size |
|
|
| train_dataset, val_dataset = random_split(dataset, [train_size, val_size]) |
|
|
| print('{:>5,} training samples'.format(train_size)) |
| print('{:>5,} validation samples'.format(val_size)) |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| |
| |
| train_dataloader = DataLoader( |
| train_dataset, |
| sampler = RandomSampler(train_dataset), |
| batch_size = batch_size |
| ) |
|
|
| |
| validation_dataloader = DataLoader( |
| val_dataset, |
| sampler = SequentialSampler(val_dataset), |
| batch_size = batch_size |
| ) |
|
|
|
|
| |
| configuration = GPT2Config.from_pretrained('gpt2', output_hidden_states=False) |
|
|
| |
| model = GPT2LMHeadModel.from_pretrained("gpt2", config=configuration) |
|
|
| |
| |
| model.resize_token_embeddings(len(tokenizer)) |
|
|
| |
| device = torch.device("cuda") |
|
|
| model = model.to(device) |
|
|
| print('Model loaded to GPU') |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| |
| |
| |
| |
|
|
| |
|
|
| epochs = 1 |
| learning_rate = 5e-4 |
| warmup_steps = 1e2 |
| epsilon = 1e-8 |
|
|
| |
| sample_every = 1000 |
|
|
| |
| optimizer = AdamW(model.parameters(), |
| lr = learning_rate, |
| eps = epsilon |
| ) |
|
|
| |
| |
| total_steps = len(train_dataloader) * epochs |
|
|
| |
| |
| scheduler = get_linear_schedule_with_warmup(optimizer, |
| num_warmup_steps = warmup_steps, |
| num_training_steps = total_steps) |
|
|
|
|
|
|
|
|
| def format_time(elapsed): |
| return str(datetime.timedelta(seconds=int(round((elapsed))))) |
|
|
| output_dir = '/media/data_dump/Ritwik/ggpt/model_save/' |
|
|
| |
| if not os.path.exists(output_dir): |
| os.makedirs(output_dir) |
|
|
| total_t0 = time.time() |
|
|
| training_stats = [] |
|
|
| last_epoch, last_step = -1, -1 |
| try: |
| file = open(out_path+'model_save/checkpoint_state_pretraining.txt','r') |
| content = [x.split(':') for x in file.read().split('|')] |
| file.close() |
| except: |
| content = [] |
|
|
| if len(content) == 2: |
| last_epoch = int(content[1][1]) |
| last_step = int(content[0][1]) |
|
|
| checkpoint = torch.load(out_path+'model_save/model_checkpoint_pretraining.pth.tar') |
| print(model.load_state_dict(checkpoint['state_dict'])) |
| tokenizer = torch.load(out_path+'model_save/tokenizer_checkpoint_pretraining.pth.tar') |
| print(datetime.datetime.now(IST).strftime("%c")) |
| |
| |
| |
|
|
|
|
| for epoch_i in range(0, epochs): |
|
|
| |
| |
| |
|
|
| print("") |
| print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)) |
| print('Training...') |
|
|
| if last_epoch!=-1: |
| if epoch_i < last_epoch: |
| continue |
|
|
| t0 = time.time() |
|
|
| total_train_loss = 0 |
|
|
| model.train() |
|
|
| for step, batch in enumerate(train_dataloader): |
|
|
| if last_step != -1: |
| if step <= last_step: |
| continue |
|
|
| b_input_ids = batch[0].to(device) |
| b_labels = batch[0].to(device) |
| b_masks = batch[1].to(device) |
|
|
| model.zero_grad() |
|
|
| outputs = model( b_input_ids, |
| labels=b_labels, |
| attention_mask = b_masks, |
| token_type_ids=None |
| ) |
|
|
| loss = outputs[0] |
|
|
| batch_loss = loss.item() |
| total_train_loss += batch_loss |
|
|
| |
| if step % sample_every == 0 and not step == 0: |
|
|
| elapsed = format_time(time.time() - t0) |
| print(' Batch {:>5,} of {:>5,}. Loss: {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), batch_loss, elapsed)) |
|
|
| model.eval() |
|
|
| sample_outputs = model.generate( |
| bos_token_id=random.randint(1,30000), |
| do_sample=True, |
| top_k=50, |
| max_length = 200, |
| top_p=0.95, |
| num_return_sequences=1 |
| ) |
| for i, sample_output in enumerate(sample_outputs): |
| print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True))) |
| |
| model.train() |
|
|
| try: |
| torch.save({'state_dict': model.state_dict()}, out_path+'model_save/model_checkpoint_pretraining.pth.tar') |
| torch.save(tokenizer, out_path+'model_save/tokenizer_checkpoint_pretraining.pth.tar') |
| file = open(out_path+'model_save/checkpoint_state_pretraining.txt','w') |
| file.write('step:'+str(step)+'|epoch:'+str(epoch_i)) |
| file.close() |
| except: |
| torch.save({'state_dict': model.state_dict()}, out_path+'model_save/model_checkpoint_pretraining.pth.tar') |
| torch.save(tokenizer, out_path+'model_save/tokenizer_checkpoint_pretraining.pth.tar') |
| file = open(out_path+'model_save/checkpoint_state_pretraining.txt','w') |
| file.write('step:'+str(step)+'|epoch:'+str(epoch_i)) |
| file.close() |
|
|
| loss.backward() |
|
|
| optimizer.step() |
|
|
| scheduler.step() |
|
|
| last_epoch, last_step = -1, -1 |
| |
| avg_train_loss = total_train_loss / len(train_dataloader) |
| |
| |
| training_time = format_time(time.time() - t0) |
|
|
| print("") |
| print(" Average training loss: {0:.2f}".format(avg_train_loss)) |
| print(" Training epoch took: {:}".format(training_time)) |
| print(datetime.datetime.now(IST).strftime("%c")) |
| |
| |
| |
| |
|
|
| print("") |
| print("Running Validation...") |
|
|
| t0 = time.time() |
|
|
| model.eval() |
|
|
| total_eval_loss = 0 |
| nb_eval_steps = 0 |
|
|
| |
| for batch in validation_dataloader: |
| |
| b_input_ids = batch[0].to(device) |
| b_labels = batch[0].to(device) |
| b_masks = batch[1].to(device) |
| |
| with torch.no_grad(): |
|
|
| outputs = model(b_input_ids, |
| |
| attention_mask = b_masks, |
| labels=b_labels) |
| |
| loss = outputs[0] |
| |
| batch_loss = loss.item() |
| total_eval_loss += batch_loss |
|
|
| avg_val_loss = total_eval_loss / len(validation_dataloader) |
| |
| validation_time = format_time(time.time() - t0) |
|
|
| print(" Validation Loss: {0:.2f}".format(avg_val_loss)) |
| print(" Validation took: {:}".format(validation_time)) |
|
|
| |
| training_stats.append( |
| { |
| 'epoch': epoch_i + 1, |
| 'Training Loss': avg_train_loss, |
| 'Valid. Loss': avg_val_loss, |
| 'Training Time': training_time, |
| 'Validation Time': validation_time |
| } |
| ) |
|
|
| print("") |
| print("Training complete!") |
| print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0))) |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| try: |
| |
| pd.set_option('precision', 2) |
|
|
| |
| df_stats = pd.DataFrame(data=training_stats) |
|
|
| |
| df_stats = df_stats.set_index('epoch') |
|
|
| |
| |
|
|
| |
| print(df_stats) |
|
|
| |
| sns.set(style='darkgrid') |
|
|
| |
| sns.set(font_scale=1.5) |
| plt.rcParams["figure.figsize"] = (12,6) |
|
|
| |
| plt.plot(df_stats['Training Loss'], 'b-o', label="Training") |
| plt.plot(df_stats['Valid. Loss'], 'g-o', label="Validation") |
|
|
| |
| plt.title("Training & Validation Loss") |
| plt.xlabel("Epoch") |
| plt.ylabel("Loss") |
| plt.legend() |
| plt.xticks([1, 2, 3, 4]) |
|
|
| |
| plt.savefig(out_path+"training.png") |
|
|
| |
| params = list(model.named_parameters()) |
|
|
| print('The GPT-2 model has {:} different named parameters.\n'.format(len(params))) |
|
|
| print('==== Embedding Layer ====\n') |
|
|
| for p in params[0:2]: |
| print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size())))) |
|
|
| print('\n==== First Transformer ====\n') |
|
|
| for p in params[2:14]: |
| print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size())))) |
|
|
| print('\n==== Output Layer ====\n') |
|
|
| for p in params[-2:]: |
| print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size())))) |
|
|
| |
|
|
| print("Saving model to %s" % output_dir) |
|
|
| |
| |
| |
| |
| model.save_pretrained(output_dir) |
| tokenizer.save_pretrained(output_dir) |
|
|
| |
| |
|
|
| except Exception as e: |
| print(e) |
| print('Waiting for 10 seconds') |
| time.sleep(10) |
|
|
| |
|
|
| sfile = 'all_tc_sents_768.txt' |
| print(sfile) |
| file = open(sfile,'r') |
| lines = file.readlines() |
| file.close() |
| lines = [[x.strip()] for x in lines] |
|
|
| df = pd.DataFrame(lines, columns=['bio_main']) |
|
|
| print('Dataframe created') |
| df.dropna(inplace=True) |
| bios = df.bio_main.copy() |
|
|
| doc_lengths = [] |
| for bio in bios: |
| |
| tokens = nltk.word_tokenize(bio) |
| doc_lengths.append(len(tokens)) |
| doc_lengths = np.array(doc_lengths) |
| a = sns.distplot(doc_lengths) |
| a.get_figure().savefig(out_path+"out.png") |
| print('len(doc_lengths[doc_lengths > 768])/len(doc_lengths)',len(doc_lengths[doc_lengths > 768])/len(doc_lengths)) |
| print('np.average(doc_lengths)',np.average(doc_lengths)) |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
|
|
| print("The max model length is {} for this model, although the actual embedding size for GPT small is 768".format(tokenizer.model_max_length)) |
| print("The beginning of sequence token {} token has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.bos_token_id), tokenizer.bos_token_id)) |
| print("The end of sequence token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.eos_token_id), tokenizer.eos_token_id)) |
| print("The padding token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.pad_token_id), tokenizer.pad_token_id)) |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| batch_size = 4 |
|
|
| class GPT2Dataset(Dataset): |
|
|
| def __init__(self, txt_list, tokenizer, gpt2_type="gpt2", max_length=768): |
|
|
| self.tokenizer = tokenizer |
| self.input_ids = [] |
| self.attn_masks = [] |
|
|
| for txt in txt_list: |
|
|
| encodings_dict = tokenizer('<|startoftext|>'+ txt + '<|endoftext|>', truncation=True, max_length=max_length, padding="max_length") |
|
|
| self.input_ids.append(torch.tensor(encodings_dict['input_ids'])) |
| self.attn_masks.append(torch.tensor(encodings_dict['attention_mask'])) |
| |
| def __len__(self): |
| return len(self.input_ids) |
|
|
| def __getitem__(self, idx): |
| return self.input_ids[idx], self.attn_masks[idx] |
|
|
| dataset = GPT2Dataset(bios, tokenizer, max_length=768) |
|
|
| |
| train_size = int(0.9 * len(dataset)) |
| val_size = len(dataset) - train_size |
|
|
| train_dataset, val_dataset = random_split(dataset, [train_size, val_size]) |
|
|
| print('{:>5,} training samples'.format(train_size)) |
| print('{:>5,} validation samples'.format(val_size)) |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| |
| |
| train_dataloader = DataLoader( |
| train_dataset, |
| sampler = RandomSampler(train_dataset), |
| batch_size = batch_size |
| ) |
|
|
| |
| validation_dataloader = DataLoader( |
| val_dataset, |
| sampler = SequentialSampler(val_dataset), |
| batch_size = batch_size |
| ) |
|
|
| |
| ''' |
| # I'm not really doing anything with the config buheret |
| configuration = GPT2Config.from_pretrained('gpt2', output_hidden_states=False) |
| |
| # instantiate the model |
| model = GPT2LMHeadModel.from_pretrained("gpt2", config=configuration) |
| |
| # this step is necessary because I've added some tokens (bos_token, etc) to the embeddings |
| # otherwise the tokenizer and model tensors won't match up |
| model.resize_token_embeddings(len(tokenizer)) |
| |
| # Tell pytorch to run this model on the GPU. |
| device = torch.device("cuda") |
| |
| model = model.to(device) |
| ''' |
|
|
| print('Model loaded to GPU') |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| |
| |
| |
| |
|
|
| |
|
|
| epochs = 3 |
| learning_rate = 5e-4 |
| warmup_steps = 1e2 |
| epsilon = 1e-8 |
|
|
| |
| sample_every = 1000 |
|
|
| |
| optimizer = AdamW(model.parameters(), |
| lr = learning_rate, |
| eps = epsilon |
| ) |
|
|
| |
| |
| total_steps = len(train_dataloader) * epochs |
|
|
| |
| |
| scheduler = get_linear_schedule_with_warmup(optimizer, |
| num_warmup_steps = warmup_steps, |
| num_training_steps = total_steps) |
|
|
|
|
|
|
|
|
| def format_time(elapsed): |
| return str(datetime.timedelta(seconds=int(round((elapsed))))) |
|
|
| output_dir = '/media/data_dump/Ritwik/ggpt/model_save/' |
|
|
| |
| if not os.path.exists(output_dir): |
| os.makedirs(output_dir) |
|
|
| total_t0 = time.time() |
|
|
| training_stats = [] |
|
|
| last_epoch, last_step = -1, -1 |
| try: |
| file = open(out_path+'model_save/checkpoint_state.txt','r') |
| content = [x.split(':') for x in file.read().split('|')] |
| file.close() |
| except: |
| content = [] |
|
|
| if len(content) == 2: |
| last_epoch = int(content[1][1]) |
| last_step = int(content[0][1]) |
|
|
| checkpoint = torch.load(out_path+'model_save/model_checkpoint.pth.tar') |
| print(model.load_state_dict(checkpoint['state_dict'])) |
| tokenizer = torch.load(out_path+'model_save/tokenizer_checkpoint.pth.tar') |
| print(datetime.datetime.now(IST).strftime("%c")) |
| |
| |
| |
|
|
|
|
| for epoch_i in range(0, epochs): |
|
|
| |
| |
| |
|
|
| print("") |
| print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)) |
| print('Training...') |
|
|
| if last_epoch!=-1: |
| if epoch_i < last_epoch: |
| continue |
|
|
| t0 = time.time() |
|
|
| total_train_loss = 0 |
|
|
| model.train() |
|
|
| for step, batch in enumerate(train_dataloader): |
|
|
| if last_step != -1: |
| if step <= last_step: |
| continue |
|
|
| b_input_ids = batch[0].to(device) |
| b_labels = batch[0].to(device) |
| b_masks = batch[1].to(device) |
|
|
| model.zero_grad() |
|
|
| outputs = model( b_input_ids, |
| labels=b_labels, |
| attention_mask = b_masks, |
| token_type_ids=None |
| ) |
|
|
| loss = outputs[0] |
|
|
| batch_loss = loss.item() |
| total_train_loss += batch_loss |
|
|
| |
| if step % sample_every == 0 and not step == 0: |
|
|
| elapsed = format_time(time.time() - t0) |
| print(' Batch {:>5,} of {:>5,}. Loss: {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), batch_loss, elapsed)) |
|
|
| model.eval() |
|
|
| sample_outputs = model.generate( |
| bos_token_id=random.randint(1,30000), |
| do_sample=True, |
| top_k=50, |
| max_length = 200, |
| top_p=0.95, |
| num_return_sequences=1 |
| ) |
| for i, sample_output in enumerate(sample_outputs): |
| print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True))) |
| |
| model.train() |
|
|
| torch.save({'state_dict': model.state_dict()}, out_path+'model_save/model_checkpoint.pth.tar') |
| torch.save(tokenizer, out_path+'model_save/tokenizer_checkpoint.pth.tar') |
| file = open(out_path+'model_save/checkpoint_state.txt','w') |
| file.write('step:'+str(step)+'|epoch:'+str(epoch_i)) |
| file.close() |
|
|
| loss.backward() |
|
|
| optimizer.step() |
|
|
| scheduler.step() |
|
|
| last_epoch, last_step = -1, -1 |
| |
| avg_train_loss = total_train_loss / len(train_dataloader) |
| |
| |
| training_time = format_time(time.time() - t0) |
|
|
| print("") |
| print(" Average training loss: {0:.2f}".format(avg_train_loss)) |
| print(" Training epoch took: {:}".format(training_time)) |
| print(datetime.datetime.now(IST).strftime("%c")) |
| |
| |
| |
| |
|
|
| print("") |
| print("Running Validation...") |
|
|
| t0 = time.time() |
|
|
| model.eval() |
|
|
| total_eval_loss = 0 |
| nb_eval_steps = 0 |
|
|
| |
| for batch in validation_dataloader: |
| |
| b_input_ids = batch[0].to(device) |
| b_labels = batch[0].to(device) |
| b_masks = batch[1].to(device) |
| |
| with torch.no_grad(): |
|
|
| outputs = model(b_input_ids, |
| |
| attention_mask = b_masks, |
| labels=b_labels) |
| |
| loss = outputs[0] |
| |
| batch_loss = loss.item() |
| total_eval_loss += batch_loss |
|
|
| avg_val_loss = total_eval_loss / len(validation_dataloader) |
| |
| validation_time = format_time(time.time() - t0) |
|
|
| print(" Validation Loss: {0:.2f}".format(avg_val_loss)) |
| print(" Validation took: {:}".format(validation_time)) |
|
|
| |
| training_stats.append( |
| { |
| 'epoch': epoch_i + 1, |
| 'Training Loss': avg_train_loss, |
| 'Valid. Loss': avg_val_loss, |
| 'Training Time': training_time, |
| 'Validation Time': validation_time |
| } |
| ) |
|
|
| print("") |
| print("Training complete!") |
| print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0))) |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| |
| pd.set_option('precision', 2) |
|
|
| |
| df_stats = pd.DataFrame(data=training_stats) |
|
|
| |
| df_stats = df_stats.set_index('epoch') |
|
|
| |
| |
|
|
| |
| print(df_stats) |
|
|
| |
| sns.set(style='darkgrid') |
|
|
| |
| sns.set(font_scale=1.5) |
| plt.rcParams["figure.figsize"] = (12,6) |
|
|
| |
| plt.plot(df_stats['Training Loss'], 'b-o', label="Training") |
| plt.plot(df_stats['Valid. Loss'], 'g-o', label="Validation") |
|
|
| |
| plt.title("Training & Validation Loss") |
| plt.xlabel("Epoch") |
| plt.ylabel("Loss") |
| plt.legend() |
| plt.xticks([1, 2, 3, 4]) |
|
|
| |
| plt.savefig(out_path+"training.png") |
|
|
| |
| params = list(model.named_parameters()) |
|
|
| print('The GPT-2 model has {:} different named parameters.\n'.format(len(params))) |
|
|
| print('==== Embedding Layer ====\n') |
|
|
| for p in params[0:2]: |
| print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size())))) |
|
|
| print('\n==== First Transformer ====\n') |
|
|
| for p in params[2:14]: |
| print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size())))) |
|
|
| print('\n==== Output Layer ====\n') |
|
|
| for p in params[-2:]: |
| print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size())))) |
|
|
| |
|
|
| print("Saving model to %s" % output_dir) |
|
|
| |
| |
| |
| |
| model.save_pretrained(output_dir) |
| tokenizer.save_pretrained(output_dir) |
|
|
| |
| |
|
|
|
|
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
|
|
| print('Model and tokenizer loaded!') |
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|
| model.eval() |
|
|
| prompt = "<|startoftext|> I wish to say that" |
|
|
| generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0) |
| generated = generated.to(device) |
|
|
| print(generated) |
|
|
| sample_outputs = model.generate( |
| generated, |
| |
| do_sample=True, |
| top_k=50, |
| max_length = 500, |
| top_p=0.95, |
| num_return_sequences=3 |
| ) |
|
|
| for i, sample_output in enumerate(sample_outputs): |
| print("{}: {}\n\n".format(i, tokenizer.decode(sample_output, skip_special_tokens=True))) |
|
|
| print(datetime.datetime.now(IST).strftime("%c")) |
|
|