Spaces:
Runtime error
Runtime error
File size: 2,032 Bytes
9ec8f41 de491c8 9ec8f41 de491c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
__copyright__ = "Copyright (C) 2023 Ali Mustapha"
__license__ = "GPL-3.0-or-later"
from unidecode import unidecode
import pandas as od
import regex
import unicodedata
import re
def is_most_common_char(s):
max_count = len(s) * 0.90 # calculate the maximum count of a single character
char_count = {} # create an empty dictionary to store character counts
for c in s:
if not unicodedata.name(c, "") or not unicodedata.name(c).startswith('LATIN'):
return False # return False if the character is not a Latin character
char_count[c] = char_count.get(c, 0) + 1 # increment the count of the character
if char_count[c] > max_count: # if the count exceeds the maximum count
return True # return True
return False # return False if no Latin character appears more than MAX_COUNT% of the time
def find_common_item(list_array):
result_array = [pair[0] for pair in list_array]
m_count = len(list(filter(lambda g: g==0, result_array)))
f_count = len(list(filter(lambda g: g==1, result_array)))
u_count = len(list(filter(lambda g: g==2, result_array)))
if u_count > max(m_count,f_count):
return 2
else:
if m_count > f_count:
return 0
elif f_count > m_count:
return 1
else:
return 2
def is_roman_language(text):
roman_pattern = r'^\p{Latin}+$'
match = regex.match(roman_pattern, text, flags=regex.UNICODE)
return match is not None
def text_to_romanize(text):
if not is_roman_language(text):
return unidecode(text)
else:
return text
def is_alpha(s:str, min_alpha=0.60)->bool:
if len(s)==0:
return False
else:
alpha_chars=sum(
map(lambda c: 1 if unicodedata.category(c).startswith("L") or unicodedata.category(c)=="Zs" else 0,s)
)
return alpha_chars/len(s) >=min_alpha
def remove_spaces_from_ends(input_string):
return re.sub(r'^\s+|\s+$', '', input_string) |