Spaces:

DarylT01
/

dfa-ui

Sleeping

dfa-ui / utils.py

fix(txt): add all texts, add utils for text processing

74e28e3 over 1 year ago

1.36 kB

	def file_preprocess(file_name: str):
	"""
	Preprocesses the file by:
	1. removing duplicates
	2. sorting the words
	3. removing non-alphabetic words
	"""
	words = []
	alphabets = set("abcdefghijklmnopqrstuvwxyz")
	file = open(file_name, "r")
	words = file.read().splitlines()
	file.close()
	# print(
	# sorted(
	# list(
	# set(
	# word
	# for word in words
	# if all(char.lower() in alphabets for char in word)
	# )
	# )
	# )
	# )
	file = open(file_name, "w")
	file.write(
	"\n".join(
	sorted(
	list(
	set(
	word
	for word in words
	if all(char.lower() in alphabets for char in word)
	)
	)
	)
	)
	)
	file.close()


	def find_duplicates_in_files(file_names: list[str]):
	words = []
	for file_name in file_names:
	file = open(file_name, "r")
	words += file.read().splitlines()
	file.close()
	print(sorted(list(set([word for word in words if words.count(word) > 1]))))


	file_preprocess("adjectives.txt")
	# find_duplicates_in_files(["conjunctions.txt", "adverbs.txt", "adjectives.txt"])