Spaces:

vishnuraggav
/

LanguageTranslation

Sleeping

LanguageTranslation / utils.py

First

ed8878f over 1 year ago

1.62 kB

	SOS_token = 0
	EOS_token = 1
	UNK_token = 2

	class Lang:
	def __init__(self, name):
	self.name = name
	self.word_to_index = {"SOS": 0, "EOS": 1, "UNK": 2}
	self.index_to_word = {0: "SOS", 1: "EOS", 2: "UNK"}
	self.word_to_count = {}
	self.n_words = 3

	self.regex_pattern = r"[\W\s\d]+"
	self.eng_prefixes = [
	"i will", "i ll"
	"i am ", "i m",
	"i have", "i ve",
	"he is", "he s",
	"she is", "she s",
	"you are", "you re",
	"we are", "we re",
	"they are", "they re",
	"i did", "i d"
	]

	def addSentence(self, sentence):
	for word in sentence.lower().split(' '):
	word = re.sub(self.regex_pattern, ' ', word).strip()

	if self.name == 'English':
	if word in self.eng_prefixes:
	index = self.eng_prefixes.index(word)
	word = self.eng_prefixes[index-1]
	for subword in word.split(' '):
	if subword:
	self.addWord(subword.strip())

	elif word != ' ' and word:
	self.addWord(word.strip())

	elif word:
	self.addWord(word.strip())

	def addWord(self, word):
	if word not in self.word_to_index:
	self.word_to_index[word] = self.n_words
	self.word_to_count[word] = 1
	self.index_to_word[self.n_words] = word
	self.n_words += 1
	else:
	self.word_to_count[word] += 1