Spaces:
Runtime error
Runtime error
| __copyright__ = "Copyright (C) 2023 Ali Mustapha" | |
| __license__ = "GPL-3.0-or-later" | |
| from unidecode import unidecode | |
| import pandas as od | |
| import regex | |
| import unicodedata | |
| import re | |
| def is_most_common_char(s): | |
| max_count = len(s) * 0.90 # calculate the maximum count of a single character | |
| char_count = {} # create an empty dictionary to store character counts | |
| for c in s: | |
| if not unicodedata.name(c, "") or not unicodedata.name(c).startswith('LATIN'): | |
| return False # return False if the character is not a Latin character | |
| char_count[c] = char_count.get(c, 0) + 1 # increment the count of the character | |
| if char_count[c] > max_count: # if the count exceeds the maximum count | |
| return True # return True | |
| return False # return False if no Latin character appears more than MAX_COUNT% of the time | |
| def find_common_item(list_array): | |
| result_array = [pair[0] for pair in list_array] | |
| m_count = len(list(filter(lambda g: g==0, result_array))) | |
| f_count = len(list(filter(lambda g: g==1, result_array))) | |
| u_count = len(list(filter(lambda g: g==2, result_array))) | |
| if u_count > max(m_count,f_count): | |
| return 2 | |
| else: | |
| if m_count > f_count: | |
| return 0 | |
| elif f_count > m_count: | |
| return 1 | |
| else: | |
| return 2 | |
| def is_roman_language(text): | |
| roman_pattern = r'^\p{Latin}+$' | |
| match = regex.match(roman_pattern, text, flags=regex.UNICODE) | |
| return match is not None | |
| def text_to_romanize(text): | |
| if not is_roman_language(text): | |
| return unidecode(text) | |
| else: | |
| return text | |
| def is_alpha(s:str, min_alpha=0.60)->bool: | |
| if len(s)==0: | |
| return False | |
| else: | |
| alpha_chars=sum( | |
| map(lambda c: 1 if unicodedata.category(c).startswith("L") or unicodedata.category(c)=="Zs" else 0,s) | |
| ) | |
| return alpha_chars/len(s) >=min_alpha | |
| def remove_spaces_from_ends(input_string): | |
| return re.sub(r'^\s+|\s+$', '', input_string) |