Spaces:

Momnadar
/

testDemo2

Sleeping

testDemo2 / app.py

Momnadar1

initial commit

473127e over 1 year ago

4.57 kB

	import torch
	import torch.nn as nn
	from torch.nn import functional as F
	import gradio as gr

	n_emb = 64
	block_size = 32
	# head_size = 4
	n_x = 4
	num_heads = 4
	eval_iteration = 250
	max_iters = 5000
	batch_size = 32

	device = 'cuda' if torch.cuda.is_available() else 'cpu'


	class Head(nn.Module):
	"""
	one head in self attention
	"""

	def __init__(self, head_size):
	super().__init__()
	self.key = nn.Linear(n_emb, head_size)
	self.query = nn.Linear(n_emb, head_size)
	self.value = nn.Linear(n_emb, head_size)
	self.dropout = nn.Dropout(0.0)

	# tril: lower-triangular
	self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))

	def forward(self, x):
	batch, blocks, X = x.shape
	query = self.query(x) # batch, block_size, X -- shape
	key = self.key(x) # batch, block_size, X -- shape
	weight = query @ key.transpose(-2, -1) * X ** -0.5 # batch, block_size, X @ batch, X, blocl_size ---> batch, block_size, block_size
	weight = weight.masked_fill(self.tril[:blocks, :blocks] == 0,float('-inf'))
	weight = F.softmax(weight, dim=-1)
	weight = self.dropout(weight)
	out = weight @ self.value(x)
	return out

	class MultiHeadAttention(nn.Module):
	"""
	multi head in self attention
	"""
	# nnum_head = 6
	# head_size
	def __init__(self, head_size, num_heads):
	super().__init__()
	self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
	self.layer = nn.Linear(n_emb, n_emb)
	self.dropout = nn.Dropout(0.0)

	def forward(self, x):
	out = torch.cat([h(x) for h in self.heads], dim=-1)
	return self.dropout(self.layer(out))


	class FeedForward(nn.Module):

	def __init__(self, n_emb):
	super().__init__()
	self.dff = nn.Sequential(
	nn.Linear(n_emb, n_emb*4),
	nn.ReLU(),
	nn.Linear(4*n_emb, n_emb),
	nn.Dropout(0.0)
	)

	def forward(self, x):
	return self.dff(x)

	class BlockSeq(nn.Module):
	def __init__(self, n_emb, num_heads):
	super().__init__()
	head_size = int(n_emb / num_heads)
	self.mh_att = MultiHeadAttention(head_size, num_heads)
	self.ff_lay = FeedForward(n_emb)
	self.ln1 = nn.LayerNorm(n_emb)
	self.ln2 = nn.LayerNorm(n_emb)

	def forward(self, x):
	x = x + self.mh_att(self.ln1(x))
	x = x + self.ff_lay(self.ln2(x))
	return x


	class TextGenerator(nn.Module):
	def __init__(self):
	super().__init__()
	# x = [1, 25, 89, 65,63,64]
	self.lookup_token_emd_table = nn.Embedding(vocab_size, n_emb)
	self.postional_encoding = nn.Embedding(block_size, n_emb)
	self.blocks = nn.Sequential(*[BlockSeq(n_emb, num_heads) for _ in range(n_x)])
	self.layer_norm = nn.LayerNorm(n_emb)
	self.model_head = nn.Linear(n_emb, vocab_size)

	def forward(self, x, y=None): #
	batches, block_size_x = x.shape
	out = self.lookup_token_emd_table(x) # 2, 7, 90 , x: 1,2 3
	pos_enc = self.postional_encoding(torch.arange(block_size_x, device=device))
	out = out + pos_enc
	out = self.blocks(out)
	out = self.layer_norm(out)
	out = self.model_head(out)


	if y is None:
	loss = None
	else:
	batches, block_size, X = out.shape
	loss = F.cross_entropy(out.view(batchesblock_size, X), y.view(batchesblock_size))

	return out, loss

	def generate(self, x, max_tokens=200):
	for _ in range(max_tokens):
	logits, _ = self(x[:, -block_size:])
	logits = logits[:, -1, :]
	# print(logits.shape)
	probilities = F.softmax(logits, dim=-1) # 1, 90
	next_x = torch.multinomial(probilities, num_samples=1)
	x = torch.cat((x, next_x), dim=1) # [hi, ] 1 2 3
	return x


	model = torch.load('entire_model.pth')

	import pickle

	with open('meta.pkl', 'rb') as f:
	meta = pickle.load(f)
	stoi, itos = meta['stoi'], meta['itos']
	encode = lambda s: [stoi[c] for c in s]
	decode = lambda l: [itos[i] for i in l]


	def reply(message, history):
	# encode the beginning of the prompt
	start = message
	start_ids = encode(start)
	x = (torch.tensor(start_ids, dtype=torch.long, device='cpu')[None, ...])
	print(x)
	replied = []
	# run generation
	with torch.no_grad():
	for k in range(3):
	y = model.generate(x, 200)
	replied.append(''.join(decode(y[0].tolist())))
	return '\n'.join(replied)


	gr.Interface(reply, "text", "text", title="Poet Demo").launch()