| import re | |
| def remove_symbols(text): | |
| text = re.sub(r'[^A-Za-z0-9 ]+', '', text) | |
| return text | |
| def word_tokens(text): | |
| text = re.split(r"\s", text) | |
| return text | |
| # Test | |
| # if __name__ == "__main__": | |
| # sample = 'goo very light lang..... ganda naman pero mas maganda ako!!!! hahahahahabababababababababababahahahahahahahahaha' | |
| # print("Original:", sample) | |
| # print("Cleaned:", remove_repetitive_symbols(sample)) | |
| # print("Tokens:", process_data(sample)) |