Spaces:
Sleeping
Sleeping
LuisDarioHinojosa commited on
Commit ·
25c2e0c
1
Parent(s): ade2dee
initial commit
Browse files- .gitattributes +1 -0
- app.py +61 -0
- assets/gpt_architecture_mk_VI.py +156 -0
- assets/gpt_dataset_mk_II.py +88 -0
- assets/training_data.txt +0 -0
- gpt_weights_trained_100_epochs.pth +3 -0
- requirements.txt +2 -0
.gitattributes
CHANGED
|
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
gpt_weights_trained_100_epochs.pth filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
from timeit import default_timer as timer
|
| 4 |
+
|
| 5 |
+
# import the langauge model and the dataset
|
| 6 |
+
from assets.gpt_dataset_mk_II import GptDatasetMKII
|
| 7 |
+
from assets.gpt_architecture_mk_VI import BigramLanguageModelMKVI
|
| 8 |
+
|
| 9 |
+
# hardcode device with cpu
|
| 10 |
+
device = "cpu"
|
| 11 |
+
|
| 12 |
+
# model hyperparameters
|
| 13 |
+
BLOCK_SIZE = 256 # max sequence length for the context and target samples
|
| 14 |
+
EMBEDDING_DIMENTION = 384 # number of features that will be extracted from the tokens to create numeric representations of them
|
| 15 |
+
HEAD_SIZE = 32 # number of dimentions in the self attention mechanism
|
| 16 |
+
NUM_HEADS = 12 # number of heads that will be used to instance the multihead self attention heads.
|
| 17 |
+
DROPOUT_RATE = 0.2 # dropout rate for architecture
|
| 18 |
+
NUM_BLOCKS = 12 # number of encoder blocks that will be used
|
| 19 |
+
|
| 20 |
+
# instance the model and the dataset to make predictions
|
| 21 |
+
train_dataset = GptDatasetMKII("assets/training_data.txt",block_size = BLOCK_SIZE,tokenization_mode="shifted")
|
| 22 |
+
|
| 23 |
+
# instance the model
|
| 24 |
+
model = BigramLanguageModelMKVI(
|
| 25 |
+
vocab_size=train_dataset.vocab_size,
|
| 26 |
+
embedding_dimention=EMBEDDING_DIMENTION,
|
| 27 |
+
block_size = BLOCK_SIZE,
|
| 28 |
+
num_heads = NUM_HEADS,
|
| 29 |
+
head_dropout= DROPOUT_RATE,
|
| 30 |
+
device = device,
|
| 31 |
+
num_blocks = NUM_BLOCKS
|
| 32 |
+
).to(device)
|
| 33 |
+
|
| 34 |
+
# load the state dictionary
|
| 35 |
+
model.load_state_dict(torch.load("gpt_weights_trained_100_epochs.pth", map_location=torch.device('cpu')))
|
| 36 |
+
|
| 37 |
+
# gradio function
|
| 38 |
+
def generate_output(length):
|
| 39 |
+
start_time = timer()
|
| 40 |
+
output_sequence = train_dataset.decode(model.generate(context=torch.zeros((1,1),dtype = torch.long).to(device),max_new_tokens=int(length))[0].tolist())
|
| 41 |
+
end_time = timer()
|
| 42 |
+
total_time = end_time - start_time
|
| 43 |
+
return output_sequence,total_time
|
| 44 |
+
|
| 45 |
+
# instance gradio applications
|
| 46 |
+
title = "Shakespeare Text Generation"
|
| 47 |
+
description = "Model that generates text in the style of the writter William Shakespeare."
|
| 48 |
+
article = "The model is based on the transformer architecture originally published in \"[Attention Is All You Need](https://arxiv.org/abs/1706.03762) \" paper. It was trained on a dataset cotaining all the plays from William Shakespeare, and was implemented on Pytorch from scratch by myself. It is still imperfect, but i will update it as i work on it. The purpose was for me to get acquainted with transformers and sequence models."
|
| 49 |
+
|
| 50 |
+
# instance interface
|
| 51 |
+
demo = gr.Interface(
|
| 52 |
+
fn = generate_output,
|
| 53 |
+
inputs = [gr.Number(value = 50,label = "Sequence Length",info = "Length of the sample sequence you wish to generate.")],
|
| 54 |
+
outputs = [gr.TextArea(lines = 5,label="Sequence Output"),gr.Number(label = "Execution Time (seconds)")],
|
| 55 |
+
title = title,
|
| 56 |
+
article = article,
|
| 57 |
+
description = description
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# launch interface
|
| 61 |
+
demo.launch()
|
assets/gpt_architecture_mk_VI.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
torch.manual_seed(4444)
|
| 6 |
+
|
| 7 |
+
class FeedForward(nn.Module):
|
| 8 |
+
def __init__(self,embedding_dimention,dropout):
|
| 9 |
+
super().__init__()
|
| 10 |
+
self.embedding_dimention = embedding_dimention
|
| 11 |
+
self.dropout = dropout
|
| 12 |
+
self.net = nn.Sequential(
|
| 13 |
+
nn.Linear(self.embedding_dimention, 4 * self.embedding_dimention),
|
| 14 |
+
nn.ReLU(),
|
| 15 |
+
nn.Linear(4 * self.embedding_dimention, self.embedding_dimention),
|
| 16 |
+
nn.Dropout(self.dropout)
|
| 17 |
+
)
|
| 18 |
+
def forward(self,x):
|
| 19 |
+
return self.net(x)
|
| 20 |
+
|
| 21 |
+
# attention mechanism for a single head
|
| 22 |
+
class Head(nn.Module):
|
| 23 |
+
def __init__(self,head_size,block_size,embedding_dimention,dropout):
|
| 24 |
+
super().__init__()
|
| 25 |
+
# instance hyperparameters
|
| 26 |
+
self.head_size = head_size
|
| 27 |
+
self.block_size = block_size
|
| 28 |
+
self.embedding_dimention = embedding_dimention
|
| 29 |
+
self.dropout = dropout
|
| 30 |
+
|
| 31 |
+
# instance layers for single self attention head
|
| 32 |
+
self.key = nn.Linear(self.embedding_dimention, self.head_size, bias=False)
|
| 33 |
+
self.query = nn.Linear(self.embedding_dimention, self.head_size, bias=False)
|
| 34 |
+
self.value = nn.Linear(self.embedding_dimention, self.head_size, bias=False)
|
| 35 |
+
self.register_buffer('tril', torch.tril(torch.ones(self.block_size, self.block_size)))
|
| 36 |
+
self.dropout_layer = nn.Dropout(self.dropout)
|
| 37 |
+
|
| 38 |
+
def forward(self,x):
|
| 39 |
+
batch,timesteps,channels = x.shape
|
| 40 |
+
k = self.key(x)
|
| 41 |
+
q = self.query(x)
|
| 42 |
+
|
| 43 |
+
# compute the attention scores
|
| 44 |
+
wei = q @ k.transpose(-2,-1) * channels ** -0.5 # dot product normalization and normalize to prevent explosion
|
| 45 |
+
wei = wei.masked_fill(self.tril[:timesteps,:timesteps] == 0,float("-inf")) # only include the previous tokes to average
|
| 46 |
+
wei = F.softmax(wei,dim = -1 ) # normalize to 1
|
| 47 |
+
wei = self.dropout_layer(wei)
|
| 48 |
+
# add the value
|
| 49 |
+
v = self.value(x)
|
| 50 |
+
out = wei @ v
|
| 51 |
+
|
| 52 |
+
return out
|
| 53 |
+
|
| 54 |
+
# attention mechanism for multiple heads (may head layers placed in papalel)
|
| 55 |
+
class MulheadSelfAttention(nn.Module):
|
| 56 |
+
def __init__(self,num_heads,head_size,block_size,embedding_dimention,dropout):
|
| 57 |
+
super().__init__()
|
| 58 |
+
# instance hyperparameters
|
| 59 |
+
self.num_heads = num_heads
|
| 60 |
+
self.head_size = head_size
|
| 61 |
+
self.block_size = block_size
|
| 62 |
+
self.embedding_dimention = embedding_dimention
|
| 63 |
+
self.dropout = dropout
|
| 64 |
+
# instance
|
| 65 |
+
self.heads = nn.ModuleList([Head(self.head_size,self.block_size,self.embedding_dimention,self.dropout) for _ in range(self.num_heads)])
|
| 66 |
+
self.projection = nn.Linear(self.embedding_dimention,self.embedding_dimention)
|
| 67 |
+
self.dropout_layer = nn.Dropout(self.dropout)
|
| 68 |
+
def forward(self,x):
|
| 69 |
+
out = torch.cat([h(x) for h in self.heads],dim = -1)
|
| 70 |
+
out = self.dropout_layer(self.projection(x))
|
| 71 |
+
return x
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class Block(nn.Module):
|
| 75 |
+
def __init__(self,embedding_dimention,num_heads,block_size,dropout):
|
| 76 |
+
super().__init__()
|
| 77 |
+
# instance parameters
|
| 78 |
+
self.num_heads = num_heads
|
| 79 |
+
self.embedding_dimention = embedding_dimention
|
| 80 |
+
self.head_size = self.embedding_dimention // self.num_heads
|
| 81 |
+
self.block_size = block_size
|
| 82 |
+
self.dropout = dropout
|
| 83 |
+
self.layer_norm_1 = nn.LayerNorm(self.embedding_dimention)
|
| 84 |
+
self.layer_norm_2 = nn.LayerNorm(self.embedding_dimention)
|
| 85 |
+
|
| 86 |
+
# layers
|
| 87 |
+
self.sa_heads = MulheadSelfAttention(self.num_heads,self.head_size,self.block_size,self.embedding_dimention,self.dropout)
|
| 88 |
+
self.feedfwrd = FeedForward(self.embedding_dimention,self.dropout)
|
| 89 |
+
|
| 90 |
+
def forward(self,x):
|
| 91 |
+
x = x + self.sa_heads(self.layer_norm_1(x))
|
| 92 |
+
x = x + self.feedfwrd(self.layer_norm_2(x))
|
| 93 |
+
|
| 94 |
+
return x
|
| 95 |
+
|
| 96 |
+
class BigramLanguageModelMKVI(nn.Module):
|
| 97 |
+
def __init__(self,vocab_size,embedding_dimention,block_size,num_heads,head_dropout,device,num_blocks):
|
| 98 |
+
super().__init__()
|
| 99 |
+
self.vocab_size = vocab_size
|
| 100 |
+
self.block_size = block_size
|
| 101 |
+
self.embedding_dimention = embedding_dimention
|
| 102 |
+
#self.head_size = head_size
|
| 103 |
+
self.head_dropout = head_dropout
|
| 104 |
+
self.num_heads = num_heads
|
| 105 |
+
self.num_blocks = num_blocks
|
| 106 |
+
self.device = device
|
| 107 |
+
|
| 108 |
+
# embedding matrix for each of the tokens
|
| 109 |
+
self.token_embedding = nn.Embedding(self.vocab_size,self.embedding_dimention)
|
| 110 |
+
self.position_embedding = nn.Embedding(self.block_size,self.embedding_dimention)
|
| 111 |
+
# This are replaced by the block
|
| 112 |
+
#self.sa_heads = MulheadSelfAttention(self.num_heads,self.embedding_dimention // self.num_heads,self.block_size,self.embedding_dimention,self.head_dropout)
|
| 113 |
+
#self.feedfwrd = FeedForward(self.embedding_dimention)
|
| 114 |
+
self.blocks = nn.Sequential(*
|
| 115 |
+
[
|
| 116 |
+
Block(self.embedding_dimention,self.num_heads,self.block_size,self.head_dropout) for _ in range(self.num_blocks)
|
| 117 |
+
]
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
self.layer_norm = nn.LayerNorm(self.embedding_dimention)
|
| 121 |
+
self.lm_head = nn.Linear(in_features=self.embedding_dimention,out_features=self.vocab_size)
|
| 122 |
+
|
| 123 |
+
def forward(self,context,targets = None):
|
| 124 |
+
batch,timesteps = context.shape
|
| 125 |
+
# get the logits in shape (BATCH,TIMESTEPS,CHANNELS)
|
| 126 |
+
token_embedding = self.token_embedding(context)
|
| 127 |
+
pos_embedding = self.position_embedding(torch.arange(timesteps,device = self.device))
|
| 128 |
+
x = token_embedding + pos_embedding
|
| 129 |
+
#x = self.sa_heads(x)
|
| 130 |
+
#x = self.feedfwrd(x)
|
| 131 |
+
x = self.blocks(x)
|
| 132 |
+
self.layer_norm(x)
|
| 133 |
+
logits = self.lm_head(x)
|
| 134 |
+
if targets is None:
|
| 135 |
+
loss = None
|
| 136 |
+
else:
|
| 137 |
+
batch,timesteps,channels = logits.shape
|
| 138 |
+
logits = logits.view(batch*timesteps,channels)
|
| 139 |
+
targets = targets.view(batch*timesteps)
|
| 140 |
+
loss = F.cross_entropy(logits,targets)
|
| 141 |
+
return logits,loss
|
| 142 |
+
|
| 143 |
+
def generate(self,context,max_new_tokens):
|
| 144 |
+
for _ in range(max_new_tokens):
|
| 145 |
+
# cut down block size
|
| 146 |
+
context_condition = context[:,-self.block_size:]
|
| 147 |
+
logits,loss = self(context_condition)
|
| 148 |
+
# focus only on the last timestep
|
| 149 |
+
logits = logits[:,-1,:]
|
| 150 |
+
# convert logits intro pobability distribution
|
| 151 |
+
probs = F.softmax(logits,dim=-1)
|
| 152 |
+
# sample from the distribution
|
| 153 |
+
indx_next = torch.multinomial(probs,num_samples = 1)
|
| 154 |
+
context = torch.cat([context,indx_next],dim = 1)
|
| 155 |
+
return context
|
| 156 |
+
|
assets/gpt_dataset_mk_II.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from torch.utils.data import Dataset
|
| 4 |
+
|
| 5 |
+
torch.manual_seed(4444)
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
This one addresses a problem where the construction of the features and labels as tensors fails due to a lackage
|
| 9 |
+
of samples given the specified sequence block size. The tensors will be constucting eliminating the last incomplete sequence
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
class GptDatasetMKII(Dataset):
|
| 13 |
+
"""
|
| 14 |
+
**THIS CLASS DOES NOT SUPPORT TOKEN PADDING***
|
| 15 |
+
Inputs:
|
| 16 |
+
target_dir: the directory containing the text corpus
|
| 17 |
+
encoding: the encoding that will be used to create the dataset
|
| 18 |
+
sequence_size: the length the the sequence the model will be fed at each batch iteration
|
| 19 |
+
tokenization_mode: can be either "uniform" or "shifted"
|
| 20 |
+
-> uniform: x will be a tensor of dimentions [block_size] and y will be a tensor of dimentions [1]
|
| 21 |
+
where y contains the index of the token that goes after the last token of x
|
| 22 |
+
-> shifted: both x and y will be tensors of dimentions [block_size], but y is shifted one position
|
| 23 |
+
to the right. This means the "i" element of y is the index of the token that goes after the end
|
| 24 |
+
of sequence x[:i]
|
| 25 |
+
"""
|
| 26 |
+
def __init__(self,target_dir,encoding = "utf-8",block_size = 8,tokenization_mode = "uniform"):
|
| 27 |
+
self.text_path = target_dir # directory containing the text corpus
|
| 28 |
+
self.encoding = encoding # encoding used to read the text
|
| 29 |
+
self.block_size = block_size # length the sequence to tokenize and parse the samples
|
| 30 |
+
self.token_mode = tokenization_mode # can be "uniform" or "shifted"
|
| 31 |
+
# retrieve the text corpus from the target directory
|
| 32 |
+
with open(target_dir,"r",encoding=self.encoding) as f:
|
| 33 |
+
self.raw_text = f.read() # raw text
|
| 34 |
+
f.close()
|
| 35 |
+
self.corpus_size = len(self.raw_text)
|
| 36 |
+
self.vocab = sorted(list(set(self.raw_text))) # all the characters in the vocab
|
| 37 |
+
self.vocab_size = len(self.vocab) # length of the vocab
|
| 38 |
+
self.sample_2_index = {ch:i for i,ch in enumerate(self.vocab)} # convert vocab samples to indices
|
| 39 |
+
self.index_2_sample = {i:ch for i,ch in enumerate(self.vocab)} # convert an index to a vocab sample
|
| 40 |
+
self.encode = lambda s: [self.sample_2_index[c] for c in s]
|
| 41 |
+
self.decode = lambda l: "".join([self.index_2_sample[i] for i in l])
|
| 42 |
+
if self.token_mode == "uniform":
|
| 43 |
+
self.uniform_tokenization_mode()
|
| 44 |
+
else:
|
| 45 |
+
self.shifted_tokenization_mode()
|
| 46 |
+
|
| 47 |
+
def uniform_tokenization_mode(self):
|
| 48 |
+
text_encoded = self.encode(self.raw_text)
|
| 49 |
+
dataset = list()
|
| 50 |
+
labels = list()
|
| 51 |
+
for i in range(0,self.corpus_size,self.block_size):
|
| 52 |
+
if(len(text_encoded[i:i+self.block_size]) < self.block_size):
|
| 53 |
+
break
|
| 54 |
+
dataset.append(text_encoded[i:i+self.block_size])
|
| 55 |
+
labels.append(text_encoded[i+self.block_size])
|
| 56 |
+
try:
|
| 57 |
+
self.x = torch.tensor(dataset,dtype = torch.long)
|
| 58 |
+
self.y = torch.tensor(labels,dtype = torch.long)
|
| 59 |
+
except:
|
| 60 |
+
dataset = dataset[:-1]
|
| 61 |
+
labels = dataset[:-1]
|
| 62 |
+
self.x = torch.tensor(dataset,dtype = torch.long)
|
| 63 |
+
self.y = torch.tensor(labels,dtype = torch.long)
|
| 64 |
+
|
| 65 |
+
def shifted_tokenization_mode(self):
|
| 66 |
+
text_encoded = self.encode(self.raw_text)
|
| 67 |
+
dataset = list()
|
| 68 |
+
labels = list()
|
| 69 |
+
for i in range(0,self.corpus_size,self.block_size):
|
| 70 |
+
if(len(text_encoded[i:i+self.block_size]) < self.block_size):
|
| 71 |
+
break
|
| 72 |
+
dataset.append(text_encoded[i:i+self.block_size])
|
| 73 |
+
labels.append(text_encoded[i+1:i+self.block_size+1])
|
| 74 |
+
try:
|
| 75 |
+
self.x = torch.tensor(dataset,dtype = torch.long)
|
| 76 |
+
self.y = torch.tensor(labels,dtype = torch.long)
|
| 77 |
+
except:
|
| 78 |
+
dataset = dataset[:-1]
|
| 79 |
+
labels = dataset[:-1]
|
| 80 |
+
self.x = torch.tensor(dataset,dtype = torch.long)
|
| 81 |
+
self.y = torch.tensor(labels,dtype = torch.long)
|
| 82 |
+
|
| 83 |
+
def __len__(self):
|
| 84 |
+
return len(self.x)
|
| 85 |
+
|
| 86 |
+
def __getitem__(self,index):
|
| 87 |
+
return self.x[index],self.y[index]
|
| 88 |
+
|
assets/training_data.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gpt_weights_trained_100_epochs.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29f7a980b1ece9b170b34304d3239dfe1fd8a76e7881d8161291e54076fdb5e0
|
| 3 |
+
size 123747781
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch==2.0.0
|
| 2 |
+
gradio==3.28.1
|